Bug Summary

File:tools/polly/lib/Transform/ScheduleOptimizer.cpp
Warning:line 1261, column 7
Value stored to 'NewK' during its initialization is never read

Annotated Source Code

1//===- Schedule.cpp - Calculate an optimized schedule ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass generates an entirely new schedule tree from the data dependences
11// and iteration domains. The new schedule tree is computed in two steps:
12//
13// 1) The isl scheduling optimizer is run
14//
15// The isl scheduling optimizer creates a new schedule tree that maximizes
16// parallelism and tileability and minimizes data-dependence distances. The
17// algorithm used is a modified version of the ``Pluto'' algorithm:
18//
19// U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan.
20// A Practical Automatic Polyhedral Parallelizer and Locality Optimizer.
21// In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language
22// Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008.
23//
24// 2) A set of post-scheduling transformations is applied on the schedule tree.
25//
26// These optimizations include:
27//
28// - Tiling of the innermost tilable bands
29// - Prevectorization - The choice of a possible outer loop that is strip-mined
30// to the innermost level to enable inner-loop
31// vectorization.
32// - Some optimizations for spatial locality are also planned.
33//
34// For a detailed description of the schedule tree itself please see section 6
35// of:
36//
37// Polyhedral AST generation is more than scanning polyhedra
38// Tobias Grosser, Sven Verdoolaege, Albert Cohen
39// ACM Transactions on Programming Languages and Systems (TOPLAS),
40// 37(4), July 2015
41// http://www.grosser.es/#pub-polyhedral-AST-generation
42//
43// This publication also contains a detailed discussion of the different options
44// for polyhedral loop unrolling, full/partial tile separation and other uses
45// of the schedule tree.
46//
47//===----------------------------------------------------------------------===//
48
49#include "polly/ScheduleOptimizer.h"
50#include "polly/CodeGen/CodeGeneration.h"
51#include "polly/DependenceInfo.h"
52#include "polly/LinkAllPasses.h"
53#include "polly/Options.h"
54#include "polly/ScopInfo.h"
55#include "polly/Support/GICHelper.h"
56#include "llvm/Analysis/TargetTransformInfo.h"
57#include "llvm/Support/Debug.h"
58#include "isl/aff.h"
59#include "isl/band.h"
60#include "isl/constraint.h"
61#include "isl/map.h"
62#include "isl/options.h"
63#include "isl/printer.h"
64#include "isl/schedule.h"
65#include "isl/schedule_node.h"
66#include "isl/space.h"
67#include "isl/union_map.h"
68#include "isl/union_set.h"
69
70using namespace llvm;
71using namespace polly;
72
73#define DEBUG_TYPE"polly-opt-isl" "polly-opt-isl"
74
75static cl::opt<std::string>
76 OptimizeDeps("polly-opt-optimize-only",
77 cl::desc("Only a certain kind of dependences (all/raw)"),
78 cl::Hidden, cl::init("all"), cl::ZeroOrMore,
79 cl::cat(PollyCategory));
80
81static cl::opt<std::string>
82 SimplifyDeps("polly-opt-simplify-deps",
83 cl::desc("Dependences should be simplified (yes/no)"),
84 cl::Hidden, cl::init("yes"), cl::ZeroOrMore,
85 cl::cat(PollyCategory));
86
87static cl::opt<int> MaxConstantTerm(
88 "polly-opt-max-constant-term",
89 cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
90 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
91
92static cl::opt<int> MaxCoefficient(
93 "polly-opt-max-coefficient",
94 cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
95 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
96
97static cl::opt<std::string> FusionStrategy(
98 "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"),
99 cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory));
100
101static cl::opt<std::string>
102 MaximizeBandDepth("polly-opt-maximize-bands",
103 cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
104 cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
105
106static cl::opt<std::string> OuterCoincidence(
107 "polly-opt-outer-coincidence",
108 cl::desc("Try to construct schedules where the outer member of each band "
109 "satisfies the coincidence constraints (yes/no)"),
110 cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory));
111
112static cl::opt<int> PrevectorWidth(
113 "polly-prevect-width",
114 cl::desc(
115 "The number of loop iterations to strip-mine for pre-vectorization"),
116 cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
117
118static cl::opt<bool> FirstLevelTiling("polly-tiling",
119 cl::desc("Enable loop tiling"),
120 cl::init(true), cl::ZeroOrMore,
121 cl::cat(PollyCategory));
122
123static cl::opt<int> LatencyVectorFma(
124 "polly-target-latency-vector-fma",
125 cl::desc("The minimal number of cycles between issuing two "
126 "dependent consecutive vector fused multiply-add "
127 "instructions."),
128 cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
129
130static cl::opt<int> ThroughputVectorFma(
131 "polly-target-throughput-vector-fma",
132 cl::desc("A throughput of the processor floating-point arithmetic units "
133 "expressed in the number of vector fused multiply-add "
134 "instructions per clock cycle."),
135 cl::Hidden, cl::init(1), cl::ZeroOrMore, cl::cat(PollyCategory));
136
137// This option, along with --polly-target-2nd-cache-level-associativity,
138// --polly-target-1st-cache-level-size, and --polly-target-2st-cache-level-size
139// represent the parameters of the target cache, which do not have typical
140// values that can be used by default. However, to apply the pattern matching
141// optimizations, we use the values of the parameters of Intel Core i7-3820
142// SandyBridge in case the parameters are not specified. Such an approach helps
143// also to attain the high-performance on IBM POWER System S822 and IBM Power
144// 730 Express server.
145static cl::opt<int> FirstCacheLevelAssociativity(
146 "polly-target-1st-cache-level-associativity",
147 cl::desc("The associativity of the first cache level."), cl::Hidden,
148 cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
149
150static cl::opt<int> SecondCacheLevelAssociativity(
151 "polly-target-2nd-cache-level-associativity",
152 cl::desc("The associativity of the second cache level."), cl::Hidden,
153 cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
154
155static cl::opt<int> FirstCacheLevelSize(
156 "polly-target-1st-cache-level-size",
157 cl::desc("The size of the first cache level specified in bytes."),
158 cl::Hidden, cl::init(32768), cl::ZeroOrMore, cl::cat(PollyCategory));
159
160static cl::opt<int> SecondCacheLevelSize(
161 "polly-target-2nd-cache-level-size",
162 cl::desc("The size of the second level specified in bytes."), cl::Hidden,
163 cl::init(262144), cl::ZeroOrMore, cl::cat(PollyCategory));
164
165static cl::opt<int> VectorRegisterBitwidth(
166 "polly-target-vector-register-bitwidth",
167 cl::desc("The size in bits of a vector register (if not set, this "
168 "information is taken from LLVM's target information."),
169 cl::Hidden, cl::init(-1), cl::ZeroOrMore, cl::cat(PollyCategory));
170
171static cl::opt<int> FirstLevelDefaultTileSize(
172 "polly-default-tile-size",
173 cl::desc("The default tile size (if not enough were provided by"
174 " --polly-tile-sizes)"),
175 cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory));
176
177static cl::list<int>
178 FirstLevelTileSizes("polly-tile-sizes",
179 cl::desc("A tile size for each loop dimension, filled "
180 "with --polly-default-tile-size"),
181 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
182 cl::cat(PollyCategory));
183
184static cl::opt<bool>
185 SecondLevelTiling("polly-2nd-level-tiling",
186 cl::desc("Enable a 2nd level loop of loop tiling"),
187 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
188
189static cl::opt<int> SecondLevelDefaultTileSize(
190 "polly-2nd-level-default-tile-size",
191 cl::desc("The default 2nd-level tile size (if not enough were provided by"
192 " --polly-2nd-level-tile-sizes)"),
193 cl::Hidden, cl::init(16), cl::ZeroOrMore, cl::cat(PollyCategory));
194
195static cl::list<int>
196 SecondLevelTileSizes("polly-2nd-level-tile-sizes",
197 cl::desc("A tile size for each loop dimension, filled "
198 "with --polly-default-tile-size"),
199 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
200 cl::cat(PollyCategory));
201
202static cl::opt<bool> RegisterTiling("polly-register-tiling",
203 cl::desc("Enable register tiling"),
204 cl::init(false), cl::ZeroOrMore,
205 cl::cat(PollyCategory));
206
207static cl::opt<int> RegisterDefaultTileSize(
208 "polly-register-tiling-default-tile-size",
209 cl::desc("The default register tile size (if not enough were provided by"
210 " --polly-register-tile-sizes)"),
211 cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
212
213static cl::opt<int> PollyPatternMatchingNcQuotient(
214 "polly-pattern-matching-nc-quotient",
215 cl::desc("Quotient that is obtained by dividing Nc, the parameter of the"
216 "macro-kernel, by Nr, the parameter of the micro-kernel"),
217 cl::Hidden, cl::init(256), cl::ZeroOrMore, cl::cat(PollyCategory));
218
219static cl::list<int>
220 RegisterTileSizes("polly-register-tile-sizes",
221 cl::desc("A tile size for each loop dimension, filled "
222 "with --polly-register-tile-size"),
223 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
224 cl::cat(PollyCategory));
225
226static cl::opt<bool>
227 PMBasedOpts("polly-pattern-matching-based-opts",
228 cl::desc("Perform optimizations based on pattern matching"),
229 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
230
231static cl::opt<bool> OptimizedScops(
232 "polly-optimized-scops",
233 cl::desc("Polly - Dump polyhedral description of Scops optimized with "
234 "the isl scheduling optimizer and the set of post-scheduling "
235 "transformations is applied on the schedule tree"),
236 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
237
238/// Create an isl_union_set, which describes the isolate option based on
239/// IsoalteDomain.
240///
241/// @param IsolateDomain An isl_set whose @p OutDimsNum last dimensions should
242/// belong to the current band node.
243/// @param OutDimsNum A number of dimensions that should belong to
244/// the current band node.
245static __isl_give isl_union_set *
246getIsolateOptions(__isl_take isl_set *IsolateDomain, unsigned OutDimsNum) {
247 auto Dims = isl_set_dim(IsolateDomain, isl_dim_set);
248 assert(OutDimsNum <= Dims &&((OutDimsNum <= Dims && "The isl_set IsolateDomain is used to describe the range of schedule "
"dimensions values, which should be isolated. Consequently, the "
"number of its dimensions should be greater than or equal to the "
"number of the schedule dimensions.") ? static_cast<void>
(0) : __assert_fail ("OutDimsNum <= Dims && \"The isl_set IsolateDomain is used to describe the range of schedule \" \"dimensions values, which should be isolated. Consequently, the \" \"number of its dimensions should be greater than or equal to the \" \"number of the schedule dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 252, __PRETTY_FUNCTION__))
249 "The isl_set IsolateDomain is used to describe the range of schedule "((OutDimsNum <= Dims && "The isl_set IsolateDomain is used to describe the range of schedule "
"dimensions values, which should be isolated. Consequently, the "
"number of its dimensions should be greater than or equal to the "
"number of the schedule dimensions.") ? static_cast<void>
(0) : __assert_fail ("OutDimsNum <= Dims && \"The isl_set IsolateDomain is used to describe the range of schedule \" \"dimensions values, which should be isolated. Consequently, the \" \"number of its dimensions should be greater than or equal to the \" \"number of the schedule dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 252, __PRETTY_FUNCTION__))
250 "dimensions values, which should be isolated. Consequently, the "((OutDimsNum <= Dims && "The isl_set IsolateDomain is used to describe the range of schedule "
"dimensions values, which should be isolated. Consequently, the "
"number of its dimensions should be greater than or equal to the "
"number of the schedule dimensions.") ? static_cast<void>
(0) : __assert_fail ("OutDimsNum <= Dims && \"The isl_set IsolateDomain is used to describe the range of schedule \" \"dimensions values, which should be isolated. Consequently, the \" \"number of its dimensions should be greater than or equal to the \" \"number of the schedule dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 252, __PRETTY_FUNCTION__))
251 "number of its dimensions should be greater than or equal to the "((OutDimsNum <= Dims && "The isl_set IsolateDomain is used to describe the range of schedule "
"dimensions values, which should be isolated. Consequently, the "
"number of its dimensions should be greater than or equal to the "
"number of the schedule dimensions.") ? static_cast<void>
(0) : __assert_fail ("OutDimsNum <= Dims && \"The isl_set IsolateDomain is used to describe the range of schedule \" \"dimensions values, which should be isolated. Consequently, the \" \"number of its dimensions should be greater than or equal to the \" \"number of the schedule dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 252, __PRETTY_FUNCTION__))
252 "number of the schedule dimensions.")((OutDimsNum <= Dims && "The isl_set IsolateDomain is used to describe the range of schedule "
"dimensions values, which should be isolated. Consequently, the "
"number of its dimensions should be greater than or equal to the "
"number of the schedule dimensions.") ? static_cast<void>
(0) : __assert_fail ("OutDimsNum <= Dims && \"The isl_set IsolateDomain is used to describe the range of schedule \" \"dimensions values, which should be isolated. Consequently, the \" \"number of its dimensions should be greater than or equal to the \" \"number of the schedule dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 252, __PRETTY_FUNCTION__))
;
253 auto *IsolateRelation = isl_map_from_domain(IsolateDomain);
254 IsolateRelation =
255 isl_map_move_dims(IsolateRelation, isl_dim_out, 0, isl_dim_in,
256 Dims - OutDimsNum, OutDimsNum);
257 auto *IsolateOption = isl_map_wrap(IsolateRelation);
258 auto *Id = isl_id_alloc(isl_set_get_ctx(IsolateOption), "isolate", nullptr);
259 return isl_union_set_from_set(isl_set_set_tuple_id(IsolateOption, Id));
260}
261
262/// Create an isl_union_set, which describes the atomic option for the dimension
263/// of the current node.
264///
265/// It may help to reduce the size of generated code.
266///
267/// @param Ctx An isl_ctx, which is used to create the isl_union_set.
268static __isl_give isl_union_set *getAtomicOptions(isl_ctx *Ctx) {
269 auto *Space = isl_space_set_alloc(Ctx, 0, 1);
270 auto *AtomicOption = isl_set_universe(Space);
271 auto *Id = isl_id_alloc(Ctx, "atomic", nullptr);
272 return isl_union_set_from_set(isl_set_set_tuple_id(AtomicOption, Id));
273}
274
275/// Create an isl_union_set, which describes the option of the form
276/// [isolate[] -> unroll[x]].
277///
278/// @param Ctx An isl_ctx, which is used to create the isl_union_set.
279static __isl_give isl_union_set *getUnrollIsolatedSetOptions(isl_ctx *Ctx) {
280 auto *Space = isl_space_alloc(Ctx, 0, 0, 1);
281 auto *UnrollIsolatedSetOption = isl_map_universe(Space);
282 auto *DimInId = isl_id_alloc(Ctx, "isolate", nullptr);
283 auto *DimOutId = isl_id_alloc(Ctx, "unroll", nullptr);
284 UnrollIsolatedSetOption =
285 isl_map_set_tuple_id(UnrollIsolatedSetOption, isl_dim_in, DimInId);
286 UnrollIsolatedSetOption =
287 isl_map_set_tuple_id(UnrollIsolatedSetOption, isl_dim_out, DimOutId);
288 return isl_union_set_from_set(isl_map_wrap(UnrollIsolatedSetOption));
289}
290
291/// Make the last dimension of Set to take values from 0 to VectorWidth - 1.
292///
293/// @param Set A set, which should be modified.
294/// @param VectorWidth A parameter, which determines the constraint.
295static __isl_give isl_set *addExtentConstraints(__isl_take isl_set *Set,
296 int VectorWidth) {
297 auto Dims = isl_set_dim(Set, isl_dim_set);
298 auto Space = isl_set_get_space(Set);
299 auto *LocalSpace = isl_local_space_from_space(Space);
300 auto *ExtConstr =
301 isl_constraint_alloc_inequality(isl_local_space_copy(LocalSpace));
302 ExtConstr = isl_constraint_set_constant_si(ExtConstr, 0);
303 ExtConstr =
304 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, 1);
305 Set = isl_set_add_constraint(Set, ExtConstr);
306 ExtConstr = isl_constraint_alloc_inequality(LocalSpace);
307 ExtConstr = isl_constraint_set_constant_si(ExtConstr, VectorWidth - 1);
308 ExtConstr =
309 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, -1);
310 return isl_set_add_constraint(Set, ExtConstr);
311}
312
313/// Build the desired set of partial tile prefixes.
314///
315/// We build a set of partial tile prefixes, which are prefixes of the vector
316/// loop that have exactly VectorWidth iterations.
317///
318/// 1. Get all prefixes of the vector loop.
319/// 2. Extend it to a set, which has exactly VectorWidth iterations for
320/// any prefix from the set that was built on the previous step.
321/// 3. Subtract loop domain from it, project out the vector loop dimension and
322/// get a set of prefixes, which don't have exactly VectorWidth iterations.
323/// 4. Subtract it from all prefixes of the vector loop and get the desired
324/// set.
325///
326/// @param ScheduleRange A range of a map, which describes a prefix schedule
327/// relation.
328static __isl_give isl_set *
329getPartialTilePrefixes(__isl_take isl_set *ScheduleRange, int VectorWidth) {
330 auto Dims = isl_set_dim(ScheduleRange, isl_dim_set);
331 auto *LoopPrefixes = isl_set_project_out(isl_set_copy(ScheduleRange),
332 isl_dim_set, Dims - 1, 1);
333 auto *ExtentPrefixes =
334 isl_set_add_dims(isl_set_copy(LoopPrefixes), isl_dim_set, 1);
335 ExtentPrefixes = addExtentConstraints(ExtentPrefixes, VectorWidth);
336 auto *BadPrefixes = isl_set_subtract(ExtentPrefixes, ScheduleRange);
337 BadPrefixes = isl_set_project_out(BadPrefixes, isl_dim_set, Dims - 1, 1);
338 return isl_set_subtract(LoopPrefixes, BadPrefixes);
339}
340
341__isl_give isl_schedule_node *ScheduleTreeOptimizer::isolateFullPartialTiles(
342 __isl_take isl_schedule_node *Node, int VectorWidth) {
343 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band)((isl_schedule_node_get_type(Node) == isl_schedule_node_band)
? static_cast<void> (0) : __assert_fail ("isl_schedule_node_get_type(Node) == isl_schedule_node_band"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 343, __PRETTY_FUNCTION__))
;
344 Node = isl_schedule_node_child(Node, 0);
345 Node = isl_schedule_node_child(Node, 0);
346 auto *SchedRelUMap = isl_schedule_node_get_prefix_schedule_relation(Node);
347 auto *ScheduleRelation = isl_map_from_union_map(SchedRelUMap);
348 auto *ScheduleRange = isl_map_range(ScheduleRelation);
349 auto *IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
350 auto *AtomicOption = getAtomicOptions(isl_set_get_ctx(IsolateDomain));
351 auto *IsolateOption = getIsolateOptions(IsolateDomain, 1);
352 Node = isl_schedule_node_parent(Node);
353 Node = isl_schedule_node_parent(Node);
354 auto *Options = isl_union_set_union(IsolateOption, AtomicOption);
355 Node = isl_schedule_node_band_set_ast_build_options(Node, Options);
356 return Node;
357}
358
359__isl_give isl_schedule_node *
360ScheduleTreeOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
361 unsigned DimToVectorize,
362 int VectorWidth) {
363 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band)((isl_schedule_node_get_type(Node) == isl_schedule_node_band)
? static_cast<void> (0) : __assert_fail ("isl_schedule_node_get_type(Node) == isl_schedule_node_band"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 363, __PRETTY_FUNCTION__))
;
364
365 auto Space = isl_schedule_node_band_get_space(Node);
366 auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set);
367 isl_space_free(Space);
368 assert(DimToVectorize < ScheduleDimensions)((DimToVectorize < ScheduleDimensions) ? static_cast<void
> (0) : __assert_fail ("DimToVectorize < ScheduleDimensions"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 368, __PRETTY_FUNCTION__))
;
369
370 if (DimToVectorize > 0) {
371 Node = isl_schedule_node_band_split(Node, DimToVectorize);
372 Node = isl_schedule_node_child(Node, 0);
373 }
374 if (DimToVectorize < ScheduleDimensions - 1)
375 Node = isl_schedule_node_band_split(Node, 1);
376 Space = isl_schedule_node_band_get_space(Node);
377 auto Sizes = isl_multi_val_zero(Space);
378 auto Ctx = isl_schedule_node_get_ctx(Node);
379 Sizes =
380 isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
381 Node = isl_schedule_node_band_tile(Node, Sizes);
382 Node = isolateFullPartialTiles(Node, VectorWidth);
383 Node = isl_schedule_node_child(Node, 0);
384 // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
385 // we will have troubles to match it in the backend.
386 Node = isl_schedule_node_band_set_ast_build_options(
387 Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
388 Node = isl_schedule_node_band_sink(Node);
389 Node = isl_schedule_node_child(Node, 0);
390 if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf)
391 Node = isl_schedule_node_parent(Node);
392 isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr);
393 Node = isl_schedule_node_insert_mark(Node, LoopMarker);
394 return Node;
395}
396
397__isl_give isl_schedule_node *
398ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node,
399 const char *Identifier, ArrayRef<int> TileSizes,
400 int DefaultTileSize) {
401 auto Ctx = isl_schedule_node_get_ctx(Node);
402 auto Space = isl_schedule_node_band_get_space(Node);
403 auto Dims = isl_space_dim(Space, isl_dim_set);
404 auto Sizes = isl_multi_val_zero(Space);
405 std::string IdentifierString(Identifier);
406 for (unsigned i = 0; i < Dims; i++) {
407 auto tileSize = i < TileSizes.size() ? TileSizes[i] : DefaultTileSize;
408 Sizes = isl_multi_val_set_val(Sizes, i, isl_val_int_from_si(Ctx, tileSize));
409 }
410 auto TileLoopMarkerStr = IdentifierString + " - Tiles";
411 isl_id *TileLoopMarker =
412 isl_id_alloc(Ctx, TileLoopMarkerStr.c_str(), nullptr);
413 Node = isl_schedule_node_insert_mark(Node, TileLoopMarker);
414 Node = isl_schedule_node_child(Node, 0);
415 Node = isl_schedule_node_band_tile(Node, Sizes);
416 Node = isl_schedule_node_child(Node, 0);
417 auto PointLoopMarkerStr = IdentifierString + " - Points";
418 isl_id *PointLoopMarker =
419 isl_id_alloc(Ctx, PointLoopMarkerStr.c_str(), nullptr);
420 Node = isl_schedule_node_insert_mark(Node, PointLoopMarker);
421 Node = isl_schedule_node_child(Node, 0);
422 return Node;
423}
424
425__isl_give isl_schedule_node *
426ScheduleTreeOptimizer::applyRegisterTiling(__isl_take isl_schedule_node *Node,
427 llvm::ArrayRef<int> TileSizes,
428 int DefaultTileSize) {
429 auto *Ctx = isl_schedule_node_get_ctx(Node);
430 Node = tileNode(Node, "Register tiling", TileSizes, DefaultTileSize);
431 Node = isl_schedule_node_band_set_ast_build_options(
432 Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}"));
433 return Node;
434}
435
436namespace {
437bool isSimpleInnermostBand(const isl::schedule_node &Node) {
438 assert(isl_schedule_node_get_type(Node.keep()) == isl_schedule_node_band)((isl_schedule_node_get_type(Node.keep()) == isl_schedule_node_band
) ? static_cast<void> (0) : __assert_fail ("isl_schedule_node_get_type(Node.keep()) == isl_schedule_node_band"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 438, __PRETTY_FUNCTION__))
;
439 assert(isl_schedule_node_n_children(Node.keep()) == 1)((isl_schedule_node_n_children(Node.keep()) == 1) ? static_cast
<void> (0) : __assert_fail ("isl_schedule_node_n_children(Node.keep()) == 1"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 439, __PRETTY_FUNCTION__))
;
440
441 auto ChildType = isl_schedule_node_get_type(Node.child(0).keep());
442
443 if (ChildType == isl_schedule_node_leaf)
444 return true;
445
446 if (ChildType != isl_schedule_node_sequence)
447 return false;
448
449 auto Sequence = Node.child(0);
450
451 for (int c = 0, nc = isl_schedule_node_n_children(Sequence.keep()); c < nc;
452 ++c) {
453 auto Child = Sequence.child(c);
454 if (isl_schedule_node_get_type(Child.keep()) != isl_schedule_node_filter)
455 return false;
456 if (isl_schedule_node_get_type(Child.child(0).keep()) !=
457 isl_schedule_node_leaf)
458 return false;
459 }
460 return true;
461}
462} // namespace
463
464bool ScheduleTreeOptimizer::isTileableBandNode(
465 __isl_keep isl_schedule_node *Node) {
466 if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
467 return false;
468
469 if (isl_schedule_node_n_children(Node) != 1)
470 return false;
471
472 if (!isl_schedule_node_band_get_permutable(Node))
473 return false;
474
475 auto Space = isl_schedule_node_band_get_space(Node);
476 auto Dims = isl_space_dim(Space, isl_dim_set);
477 isl_space_free(Space);
478
479 if (Dims <= 1)
480 return false;
481
482 auto ManagedNode = isl::manage(isl_schedule_node_copy(Node));
483 return isSimpleInnermostBand(ManagedNode);
484}
485
486__isl_give isl_schedule_node *
487ScheduleTreeOptimizer::standardBandOpts(__isl_take isl_schedule_node *Node,
488 void *User) {
489 if (FirstLevelTiling)
490 Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
491 FirstLevelDefaultTileSize);
492
493 if (SecondLevelTiling)
494 Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
495 SecondLevelDefaultTileSize);
496
497 if (RegisterTiling)
498 Node =
499 applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
500
501 if (PollyVectorizerChoice == VECTORIZER_NONE)
502 return Node;
503
504 auto Space = isl_schedule_node_band_get_space(Node);
505 auto Dims = isl_space_dim(Space, isl_dim_set);
506 isl_space_free(Space);
507
508 for (int i = Dims - 1; i >= 0; i--)
509 if (isl_schedule_node_band_member_get_coincident(Node, i)) {
510 Node = prevectSchedBand(Node, i, PrevectorWidth);
511 break;
512 }
513
514 return Node;
515}
516
517/// Get the position of a dimension with a non-zero coefficient.
518///
519/// Check that isl constraint @p Constraint has only one non-zero
520/// coefficient for dimensions that have type @p DimType. If this is true,
521/// return the position of the dimension corresponding to the non-zero
522/// coefficient and negative value, otherwise.
523///
524/// @param Constraint The isl constraint to be checked.
525/// @param DimType The type of the dimensions.
526/// @return The position of the dimension in case the isl
527/// constraint satisfies the requirements, a negative
528/// value, otherwise.
529static int getMatMulConstraintDim(__isl_keep isl_constraint *Constraint,
530 enum isl_dim_type DimType) {
531 int DimPos = -1;
532 auto *LocalSpace = isl_constraint_get_local_space(Constraint);
533 int LocalSpaceDimNum = isl_local_space_dim(LocalSpace, DimType);
534 for (int i = 0; i < LocalSpaceDimNum; i++) {
535 auto *Val = isl_constraint_get_coefficient_val(Constraint, DimType, i);
536 if (isl_val_is_zero(Val)) {
537 isl_val_free(Val);
538 continue;
539 }
540 if (DimPos >= 0 || (DimType == isl_dim_out && !isl_val_is_one(Val)) ||
541 (DimType == isl_dim_in && !isl_val_is_negone(Val))) {
542 isl_val_free(Val);
543 isl_local_space_free(LocalSpace);
544 return -1;
545 }
546 DimPos = i;
547 isl_val_free(Val);
548 }
549 isl_local_space_free(LocalSpace);
550 return DimPos;
551}
552
553/// Check the form of the isl constraint.
554///
555/// Check that the @p DimInPos input dimension of the isl constraint
556/// @p Constraint has a coefficient that is equal to negative one, the @p
557/// DimOutPos has a coefficient that is equal to one and others
558/// have coefficients equal to zero.
559///
560/// @param Constraint The isl constraint to be checked.
561/// @param DimInPos The input dimension of the isl constraint.
562/// @param DimOutPos The output dimension of the isl constraint.
563/// @return isl_stat_ok in case the isl constraint satisfies
564/// the requirements, isl_stat_error otherwise.
565static isl_stat isMatMulOperandConstraint(__isl_keep isl_constraint *Constraint,
566 int &DimInPos, int &DimOutPos) {
567 auto *Val = isl_constraint_get_constant_val(Constraint);
568 if (!isl_constraint_is_equality(Constraint) || !isl_val_is_zero(Val)) {
569 isl_val_free(Val);
570 return isl_stat_error;
571 }
572 isl_val_free(Val);
573 DimInPos = getMatMulConstraintDim(Constraint, isl_dim_in);
574 if (DimInPos < 0)
575 return isl_stat_error;
576 DimOutPos = getMatMulConstraintDim(Constraint, isl_dim_out);
577 if (DimOutPos < 0)
578 return isl_stat_error;
579 return isl_stat_ok;
580}
581
582/// Check that the access relation corresponds to a non-constant operand
583/// of the matrix multiplication.
584///
585/// Access relations that correspond to non-constant operands of the matrix
586/// multiplication depend only on two input dimensions and have two output
587/// dimensions. The function checks that the isl basic map @p bmap satisfies
588/// the requirements. The two input dimensions can be specified via @p user
589/// array.
590///
591/// @param bmap The isl basic map to be checked.
592/// @param user The input dimensions of @p bmap.
593/// @return isl_stat_ok in case isl basic map satisfies the requirements,
594/// isl_stat_error otherwise.
595static isl_stat isMatMulOperandBasicMap(__isl_take isl_basic_map *bmap,
596 void *user) {
597 auto *Constraints = isl_basic_map_get_constraint_list(bmap);
598 isl_basic_map_free(bmap);
599 if (isl_constraint_list_n_constraint(Constraints) != 2) {
600 isl_constraint_list_free(Constraints);
601 return isl_stat_error;
602 }
603 int InPosPair[] = {-1, -1};
604 auto DimInPos = user ? static_cast<int *>(user) : InPosPair;
605 for (int i = 0; i < 2; i++) {
606 auto *Constraint = isl_constraint_list_get_constraint(Constraints, i);
607 int InPos, OutPos;
608 if (isMatMulOperandConstraint(Constraint, InPos, OutPos) ==
609 isl_stat_error ||
610 OutPos > 1 || (DimInPos[OutPos] >= 0 && DimInPos[OutPos] != InPos)) {
611 isl_constraint_free(Constraint);
612 isl_constraint_list_free(Constraints);
613 return isl_stat_error;
614 }
615 DimInPos[OutPos] = InPos;
616 isl_constraint_free(Constraint);
617 }
618 isl_constraint_list_free(Constraints);
619 return isl_stat_ok;
620}
621
622/// Permute the two dimensions of the isl map.
623///
624/// Permute @p DstPos and @p SrcPos dimensions of the isl map @p Map that
625/// have type @p DimType.
626///
627/// @param Map The isl map to be modified.
628/// @param DimType The type of the dimensions.
629/// @param DstPos The first dimension.
630/// @param SrcPos The second dimension.
631/// @return The modified map.
632__isl_give isl_map *permuteDimensions(__isl_take isl_map *Map,
633 enum isl_dim_type DimType,
634 unsigned DstPos, unsigned SrcPos) {
635 assert(DstPos < isl_map_dim(Map, DimType) &&((DstPos < isl_map_dim(Map, DimType) && SrcPos <
isl_map_dim(Map, DimType)) ? static_cast<void> (0) : __assert_fail
("DstPos < isl_map_dim(Map, DimType) && SrcPos < isl_map_dim(Map, DimType)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 636, __PRETTY_FUNCTION__))
636 SrcPos < isl_map_dim(Map, DimType))((DstPos < isl_map_dim(Map, DimType) && SrcPos <
isl_map_dim(Map, DimType)) ? static_cast<void> (0) : __assert_fail
("DstPos < isl_map_dim(Map, DimType) && SrcPos < isl_map_dim(Map, DimType)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 636, __PRETTY_FUNCTION__))
;
637 if (DstPos == SrcPos)
638 return Map;
639 isl_id *DimId = nullptr;
640 if (isl_map_has_tuple_id(Map, DimType))
641 DimId = isl_map_get_tuple_id(Map, DimType);
642 auto FreeDim = DimType == isl_dim_in ? isl_dim_out : isl_dim_in;
643 isl_id *FreeDimId = nullptr;
644 if (isl_map_has_tuple_id(Map, FreeDim))
645 FreeDimId = isl_map_get_tuple_id(Map, FreeDim);
646 auto MaxDim = std::max(DstPos, SrcPos);
647 auto MinDim = std::min(DstPos, SrcPos);
648 Map = isl_map_move_dims(Map, FreeDim, 0, DimType, MaxDim, 1);
649 Map = isl_map_move_dims(Map, FreeDim, 0, DimType, MinDim, 1);
650 Map = isl_map_move_dims(Map, DimType, MinDim, FreeDim, 1, 1);
651 Map = isl_map_move_dims(Map, DimType, MaxDim, FreeDim, 0, 1);
652 if (DimId)
653 Map = isl_map_set_tuple_id(Map, DimType, DimId);
654 if (FreeDimId)
655 Map = isl_map_set_tuple_id(Map, FreeDim, FreeDimId);
656 return Map;
657}
658
659/// Check the form of the access relation.
660///
661/// Check that the access relation @p AccMap has the form M[i][j], where i
662/// is a @p FirstPos and j is a @p SecondPos.
663///
664/// @param AccMap The access relation to be checked.
665/// @param FirstPos The index of the input dimension that is mapped to
666/// the first output dimension.
667/// @param SecondPos The index of the input dimension that is mapped to the
668/// second output dimension.
669/// @return True in case @p AccMap has the expected form and false,
670/// otherwise.
671static bool isMatMulOperandAcc(__isl_keep isl_map *AccMap, int &FirstPos,
672 int &SecondPos) {
673 int DimInPos[] = {FirstPos, SecondPos};
674 if (isl_map_foreach_basic_map(AccMap, isMatMulOperandBasicMap,
675 static_cast<void *>(DimInPos)) != isl_stat_ok ||
676 DimInPos[0] < 0 || DimInPos[1] < 0)
677 return false;
678 FirstPos = DimInPos[0];
679 SecondPos = DimInPos[1];
680 return true;
681}
682
683/// Does the memory access represent a non-scalar operand of the matrix
684/// multiplication.
685///
686/// Check that the memory access @p MemAccess is the read access to a non-scalar
687/// operand of the matrix multiplication or its result.
688///
689/// @param MemAccess The memory access to be checked.
690/// @param MMI Parameters of the matrix multiplication operands.
691/// @return True in case the memory access represents the read access
692/// to a non-scalar operand of the matrix multiplication and
693/// false, otherwise.
694static bool isMatMulNonScalarReadAccess(MemoryAccess *MemAccess,
695 MatMulInfoTy &MMI) {
696 if (!MemAccess->isArrayKind() || !MemAccess->isRead())
697 return false;
698 isl_map *AccMap = MemAccess->getAccessRelation();
699 if (isMatMulOperandAcc(AccMap, MMI.i, MMI.j) && !MMI.ReadFromC &&
700 isl_map_n_basic_map(AccMap) == 1) {
701 MMI.ReadFromC = MemAccess;
702 isl_map_free(AccMap);
703 return true;
704 }
705 if (isMatMulOperandAcc(AccMap, MMI.i, MMI.k) && !MMI.A &&
706 isl_map_n_basic_map(AccMap) == 1) {
707 MMI.A = MemAccess;
708 isl_map_free(AccMap);
709 return true;
710 }
711 if (isMatMulOperandAcc(AccMap, MMI.k, MMI.j) && !MMI.B &&
712 isl_map_n_basic_map(AccMap) == 1) {
713 MMI.B = MemAccess;
714 isl_map_free(AccMap);
715 return true;
716 }
717 isl_map_free(AccMap);
718 return false;
719}
720
721/// Check accesses to operands of the matrix multiplication.
722///
723/// Check that accesses of the SCoP statement, which corresponds to
724/// the partial schedule @p PartialSchedule, are scalar in terms of loops
725/// containing the matrix multiplication, in case they do not represent
726/// accesses to the non-scalar operands of the matrix multiplication or
727/// its result.
728///
729/// @param PartialSchedule The partial schedule of the SCoP statement.
730/// @param MMI Parameters of the matrix multiplication operands.
731/// @return True in case the corresponding SCoP statement
732/// represents matrix multiplication and false,
733/// otherwise.
734static bool containsOnlyMatrMultAcc(__isl_keep isl_map *PartialSchedule,
735 MatMulInfoTy &MMI) {
736 auto *InputDimId = isl_map_get_tuple_id(PartialSchedule, isl_dim_in);
737 auto *Stmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimId));
738 isl_id_free(InputDimId);
739 unsigned OutDimNum = isl_map_dim(PartialSchedule, isl_dim_out);
740 assert(OutDimNum > 2 && "In case of the matrix multiplication the loop nest "((OutDimNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("OutDimNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 742, __PRETTY_FUNCTION__))
741 "and, consequently, the corresponding scheduling "((OutDimNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("OutDimNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 742, __PRETTY_FUNCTION__))
742 "functions have at least three dimensions.")((OutDimNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("OutDimNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 742, __PRETTY_FUNCTION__))
;
743 auto *MapI = permuteDimensions(isl_map_copy(PartialSchedule), isl_dim_out,
744 MMI.i, OutDimNum - 1);
745 auto *MapJ = permuteDimensions(isl_map_copy(PartialSchedule), isl_dim_out,
746 MMI.j, OutDimNum - 1);
747 auto *MapK = permuteDimensions(isl_map_copy(PartialSchedule), isl_dim_out,
748 MMI.k, OutDimNum - 1);
749 for (auto *MemA = Stmt->begin(); MemA != Stmt->end() - 1; MemA++) {
750 auto *MemAccessPtr = *MemA;
751 if (MemAccessPtr->isArrayKind() && MemAccessPtr != MMI.WriteToC &&
752 !isMatMulNonScalarReadAccess(MemAccessPtr, MMI) &&
753 !(MemAccessPtr->isStrideZero(isl_map_copy(MapI)) &&
754 MemAccessPtr->isStrideZero(isl_map_copy(MapJ)) &&
755 MemAccessPtr->isStrideZero(isl_map_copy(MapK)))) {
756 isl_map_free(MapI);
757 isl_map_free(MapJ);
758 isl_map_free(MapK);
759 return false;
760 }
761 }
762 isl_map_free(MapI);
763 isl_map_free(MapJ);
764 isl_map_free(MapK);
765 return true;
766}
767
768/// Check for dependencies corresponding to the matrix multiplication.
769///
770/// Check that there is only true dependence of the form
771/// S(..., k, ...) -> S(..., k + 1, …), where S is the SCoP statement
772/// represented by @p Schedule and k is @p Pos. Such a dependence corresponds
773/// to the dependency produced by the matrix multiplication.
774///
775/// @param Schedule The schedule of the SCoP statement.
776/// @param D The SCoP dependencies.
777/// @param Pos The parameter to desribe an acceptable true dependence.
778/// In case it has a negative value, try to determine its
779/// acceptable value.
780/// @return True in case dependencies correspond to the matrix multiplication
781/// and false, otherwise.
782static bool containsOnlyMatMulDep(__isl_keep isl_map *Schedule,
783 const Dependences *D, int &Pos) {
784 auto *WAR = D->getDependences(Dependences::TYPE_WAR);
785 if (!isl_union_map_is_empty(WAR)) {
786 isl_union_map_free(WAR);
787 return false;
788 }
789 isl_union_map_free(WAR);
790 auto *Dep = D->getDependences(Dependences::TYPE_RAW);
791 auto *Red = D->getDependences(Dependences::TYPE_RED);
792 if (Red)
793 Dep = isl_union_map_union(Dep, Red);
794 auto *DomainSpace = isl_space_domain(isl_map_get_space(Schedule));
795 auto *Space = isl_space_map_from_domain_and_range(isl_space_copy(DomainSpace),
796 DomainSpace);
797 auto *Deltas = isl_map_deltas(isl_union_map_extract_map(Dep, Space));
798 isl_union_map_free(Dep);
799 int DeltasDimNum = isl_set_dim(Deltas, isl_dim_set);
800 for (int i = 0; i < DeltasDimNum; i++) {
801 auto *Val = isl_set_plain_get_val_if_fixed(Deltas, isl_dim_set, i);
802 Pos = Pos < 0 && isl_val_is_one(Val) ? i : Pos;
803 if (isl_val_is_nan(Val) ||
804 !(isl_val_is_zero(Val) || (i == Pos && isl_val_is_one(Val)))) {
805 isl_val_free(Val);
806 isl_set_free(Deltas);
807 return false;
808 }
809 isl_val_free(Val);
810 }
811 isl_set_free(Deltas);
812 if (DeltasDimNum == 0 || Pos < 0)
813 return false;
814 return true;
815}
816
817/// Check if the SCoP statement could probably be optimized with analytical
818/// modeling.
819///
820/// containsMatrMult tries to determine whether the following conditions
821/// are true:
822/// 1. The last memory access modeling an array, MA1, represents writing to
823/// memory and has the form S(..., i1, ..., i2, ...) -> M(i1, i2) or
824/// S(..., i2, ..., i1, ...) -> M(i1, i2), where S is the SCoP statement
825/// under consideration.
826/// 2. There is only one loop-carried true dependency, and it has the
827/// form S(..., i3, ...) -> S(..., i3 + 1, ...), and there are no
828/// loop-carried or anti dependencies.
829/// 3. SCoP contains three access relations, MA2, MA3, and MA4 that represent
830/// reading from memory and have the form S(..., i3, ...) -> M(i1, i3),
831/// S(..., i3, ...) -> M(i3, i2), S(...) -> M(i1, i2), respectively,
832/// and all memory accesses of the SCoP that are different from MA1, MA2,
833/// MA3, and MA4 have stride 0, if the innermost loop is exchanged with any
834/// of loops i1, i2 and i3.
835///
836/// @param PartialSchedule The PartialSchedule that contains a SCoP statement
837/// to check.
838/// @D The SCoP dependencies.
839/// @MMI Parameters of the matrix multiplication operands.
840static bool containsMatrMult(__isl_keep isl_map *PartialSchedule,
841 const Dependences *D, MatMulInfoTy &MMI) {
842 auto *InputDimsId = isl_map_get_tuple_id(PartialSchedule, isl_dim_in);
843 auto *Stmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimsId));
844 isl_id_free(InputDimsId);
845 if (Stmt->size() <= 1)
846 return false;
847 for (auto *MemA = Stmt->end() - 1; MemA != Stmt->begin(); MemA--) {
848 auto *MemAccessPtr = *MemA;
849 if (!MemAccessPtr->isArrayKind())
850 continue;
851 if (!MemAccessPtr->isWrite())
852 return false;
853 auto *AccMap = MemAccessPtr->getAccessRelation();
854 if (isl_map_n_basic_map(AccMap) != 1 ||
855 !isMatMulOperandAcc(AccMap, MMI.i, MMI.j)) {
856 isl_map_free(AccMap);
857 return false;
858 }
859 isl_map_free(AccMap);
860 MMI.WriteToC = MemAccessPtr;
861 break;
862 }
863
864 if (!containsOnlyMatMulDep(PartialSchedule, D, MMI.k))
865 return false;
866
867 if (!MMI.WriteToC || !containsOnlyMatrMultAcc(PartialSchedule, MMI))
868 return false;
869
870 if (!MMI.A || !MMI.B || !MMI.ReadFromC)
871 return false;
872 return true;
873}
874
875/// Permute two dimensions of the band node.
876///
877/// Permute FirstDim and SecondDim dimensions of the Node.
878///
879/// @param Node The band node to be modified.
880/// @param FirstDim The first dimension to be permuted.
881/// @param SecondDim The second dimension to be permuted.
882static __isl_give isl_schedule_node *
883permuteBandNodeDimensions(__isl_take isl_schedule_node *Node, unsigned FirstDim,
884 unsigned SecondDim) {
885 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band &&((isl_schedule_node_get_type(Node) == isl_schedule_node_band &&
isl_schedule_node_band_n_member(Node) > std::max(FirstDim
, SecondDim)) ? static_cast<void> (0) : __assert_fail (
"isl_schedule_node_get_type(Node) == isl_schedule_node_band && isl_schedule_node_band_n_member(Node) > std::max(FirstDim, SecondDim)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 886, __PRETTY_FUNCTION__))
886 isl_schedule_node_band_n_member(Node) > std::max(FirstDim, SecondDim))((isl_schedule_node_get_type(Node) == isl_schedule_node_band &&
isl_schedule_node_band_n_member(Node) > std::max(FirstDim
, SecondDim)) ? static_cast<void> (0) : __assert_fail (
"isl_schedule_node_get_type(Node) == isl_schedule_node_band && isl_schedule_node_band_n_member(Node) > std::max(FirstDim, SecondDim)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 886, __PRETTY_FUNCTION__))
;
887 auto PartialSchedule = isl_schedule_node_band_get_partial_schedule(Node);
888 auto PartialScheduleFirstDim =
889 isl_multi_union_pw_aff_get_union_pw_aff(PartialSchedule, FirstDim);
890 auto PartialScheduleSecondDim =
891 isl_multi_union_pw_aff_get_union_pw_aff(PartialSchedule, SecondDim);
892 PartialSchedule = isl_multi_union_pw_aff_set_union_pw_aff(
893 PartialSchedule, SecondDim, PartialScheduleFirstDim);
894 PartialSchedule = isl_multi_union_pw_aff_set_union_pw_aff(
895 PartialSchedule, FirstDim, PartialScheduleSecondDim);
896 Node = isl_schedule_node_delete(Node);
897 Node = isl_schedule_node_insert_partial_schedule(Node, PartialSchedule);
898 return Node;
899}
900
901__isl_give isl_schedule_node *ScheduleTreeOptimizer::createMicroKernel(
902 __isl_take isl_schedule_node *Node, MicroKernelParamsTy MicroKernelParams) {
903 applyRegisterTiling(Node, {MicroKernelParams.Mr, MicroKernelParams.Nr}, 1);
904 Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
905 Node = permuteBandNodeDimensions(Node, 0, 1);
906 return isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);
907}
908
909__isl_give isl_schedule_node *ScheduleTreeOptimizer::createMacroKernel(
910 __isl_take isl_schedule_node *Node, MacroKernelParamsTy MacroKernelParams) {
911 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band)((isl_schedule_node_get_type(Node) == isl_schedule_node_band)
? static_cast<void> (0) : __assert_fail ("isl_schedule_node_get_type(Node) == isl_schedule_node_band"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 911, __PRETTY_FUNCTION__))
;
912 if (MacroKernelParams.Mc == 1 && MacroKernelParams.Nc == 1 &&
913 MacroKernelParams.Kc == 1)
914 return Node;
915 int DimOutNum = isl_schedule_node_band_n_member(Node);
916 std::vector<int> TileSizes(DimOutNum, 1);
917 TileSizes[DimOutNum - 3] = MacroKernelParams.Mc;
918 TileSizes[DimOutNum - 2] = MacroKernelParams.Nc;
919 TileSizes[DimOutNum - 1] = MacroKernelParams.Kc;
920 Node = tileNode(Node, "1st level tiling", TileSizes, 1);
921 Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
922 Node = permuteBandNodeDimensions(Node, DimOutNum - 2, DimOutNum - 1);
923 Node = permuteBandNodeDimensions(Node, DimOutNum - 3, DimOutNum - 1);
924 return isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);
925}
926
927/// Get the size of the widest type of the matrix multiplication operands
928/// in bytes, including alignment padding.
929///
930/// @param MMI Parameters of the matrix multiplication operands.
931/// @return The size of the widest type of the matrix multiplication operands
932/// in bytes, including alignment padding.
933static uint64_t getMatMulAlignTypeSize(MatMulInfoTy MMI) {
934 auto *S = MMI.A->getStatement()->getParent();
935 auto &DL = S->getFunction().getParent()->getDataLayout();
936 auto ElementSizeA = DL.getTypeAllocSize(MMI.A->getElementType());
937 auto ElementSizeB = DL.getTypeAllocSize(MMI.B->getElementType());
938 auto ElementSizeC = DL.getTypeAllocSize(MMI.WriteToC->getElementType());
939 return std::max({ElementSizeA, ElementSizeB, ElementSizeC});
940}
941
942/// Get the size of the widest type of the matrix multiplication operands
943/// in bits.
944///
945/// @param MMI Parameters of the matrix multiplication operands.
946/// @return The size of the widest type of the matrix multiplication operands
947/// in bits.
948static uint64_t getMatMulTypeSize(MatMulInfoTy MMI) {
949 auto *S = MMI.A->getStatement()->getParent();
950 auto &DL = S->getFunction().getParent()->getDataLayout();
951 auto ElementSizeA = DL.getTypeSizeInBits(MMI.A->getElementType());
952 auto ElementSizeB = DL.getTypeSizeInBits(MMI.B->getElementType());
953 auto ElementSizeC = DL.getTypeSizeInBits(MMI.WriteToC->getElementType());
954 return std::max({ElementSizeA, ElementSizeB, ElementSizeC});
955}
956
957/// Get parameters of the BLIS micro kernel.
958///
959/// We choose the Mr and Nr parameters of the micro kernel to be large enough
960/// such that no stalls caused by the combination of latencies and dependencies
961/// are introduced during the updates of the resulting matrix of the matrix
962/// multiplication. However, they should also be as small as possible to
963/// release more registers for entries of multiplied matrices.
964///
965/// @param TTI Target Transform Info.
966/// @param MMI Parameters of the matrix multiplication operands.
967/// @return The structure of type MicroKernelParamsTy.
968/// @see MicroKernelParamsTy
969static struct MicroKernelParamsTy
970getMicroKernelParams(const llvm::TargetTransformInfo *TTI, MatMulInfoTy MMI) {
971 assert(TTI && "The target transform info should be provided.")((TTI && "The target transform info should be provided."
) ? static_cast<void> (0) : __assert_fail ("TTI && \"The target transform info should be provided.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 971, __PRETTY_FUNCTION__))
;
972
973 // Nvec - Number of double-precision floating-point numbers that can be hold
974 // by a vector register. Use 2 by default.
975 long RegisterBitwidth = VectorRegisterBitwidth;
976
977 if (RegisterBitwidth == -1)
978 RegisterBitwidth = TTI->getRegisterBitWidth(true);
979 auto ElementSize = getMatMulTypeSize(MMI);
980 assert(ElementSize > 0 && "The element size of the matrix multiplication "((ElementSize > 0 && "The element size of the matrix multiplication "
"operands should be greater than zero.") ? static_cast<void
> (0) : __assert_fail ("ElementSize > 0 && \"The element size of the matrix multiplication \" \"operands should be greater than zero.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 981, __PRETTY_FUNCTION__))
981 "operands should be greater than zero.")((ElementSize > 0 && "The element size of the matrix multiplication "
"operands should be greater than zero.") ? static_cast<void
> (0) : __assert_fail ("ElementSize > 0 && \"The element size of the matrix multiplication \" \"operands should be greater than zero.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 981, __PRETTY_FUNCTION__))
;
982 auto Nvec = RegisterBitwidth / ElementSize;
983 if (Nvec == 0)
984 Nvec = 2;
985 int Nr =
986 ceil(sqrt(Nvec * LatencyVectorFma * ThroughputVectorFma) / Nvec) * Nvec;
987 int Mr = ceil(Nvec * LatencyVectorFma * ThroughputVectorFma / Nr);
988 return {Mr, Nr};
989}
990
991/// Get parameters of the BLIS macro kernel.
992///
993/// During the computation of matrix multiplication, blocks of partitioned
994/// matrices are mapped to different layers of the memory hierarchy.
995/// To optimize data reuse, blocks should be ideally kept in cache between
996/// iterations. Since parameters of the macro kernel determine sizes of these
997/// blocks, there are upper and lower bounds on these parameters.
998///
999/// @param MicroKernelParams Parameters of the micro-kernel
1000/// to be taken into account.
1001/// @param MMI Parameters of the matrix multiplication operands.
1002/// @return The structure of type MacroKernelParamsTy.
1003/// @see MacroKernelParamsTy
1004/// @see MicroKernelParamsTy
1005static struct MacroKernelParamsTy
1006getMacroKernelParams(const MicroKernelParamsTy &MicroKernelParams,
1007 MatMulInfoTy MMI) {
1008 // According to www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf,
1009 // it requires information about the first two levels of a cache to determine
1010 // all the parameters of a macro-kernel. It also checks that an associativity
1011 // degree of a cache level is greater than two. Otherwise, another algorithm
1012 // for determination of the parameters should be used.
1013 if (!(MicroKernelParams.Mr > 0 && MicroKernelParams.Nr > 0 &&
1014 FirstCacheLevelSize > 0 && SecondCacheLevelSize > 0 &&
1015 FirstCacheLevelAssociativity > 2 && SecondCacheLevelAssociativity > 2))
1016 return {1, 1, 1};
1017 // The quotient should be greater than zero.
1018 if (PollyPatternMatchingNcQuotient <= 0)
1019 return {1, 1, 1};
1020 int Car = floor(
1021 (FirstCacheLevelAssociativity - 1) /
1022 (1 + static_cast<double>(MicroKernelParams.Nr) / MicroKernelParams.Mr));
1023 auto ElementSize = getMatMulAlignTypeSize(MMI);
1024 assert(ElementSize > 0 && "The element size of the matrix multiplication "((ElementSize > 0 && "The element size of the matrix multiplication "
"operands should be greater than zero.") ? static_cast<void
> (0) : __assert_fail ("ElementSize > 0 && \"The element size of the matrix multiplication \" \"operands should be greater than zero.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1025, __PRETTY_FUNCTION__))
1025 "operands should be greater than zero.")((ElementSize > 0 && "The element size of the matrix multiplication "
"operands should be greater than zero.") ? static_cast<void
> (0) : __assert_fail ("ElementSize > 0 && \"The element size of the matrix multiplication \" \"operands should be greater than zero.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1025, __PRETTY_FUNCTION__))
;
1026 int Kc = (Car * FirstCacheLevelSize) /
1027 (MicroKernelParams.Mr * FirstCacheLevelAssociativity * ElementSize);
1028 double Cac =
1029 static_cast<double>(Kc * ElementSize * SecondCacheLevelAssociativity) /
1030 SecondCacheLevelSize;
1031 int Mc = floor((SecondCacheLevelAssociativity - 2) / Cac);
1032 int Nc = PollyPatternMatchingNcQuotient * MicroKernelParams.Nr;
1033 return {Mc, Nc, Kc};
1034}
1035
1036/// Create an access relation that is specific to
1037/// the matrix multiplication pattern.
1038///
1039/// Create an access relation of the following form:
1040/// [O0, O1, O2, O3, O4, O5, O6, O7, O8] -> [OI, O5, OJ]
1041/// where I is @p FirstDim, J is @p SecondDim.
1042///
1043/// It can be used, for example, to create relations that helps to consequently
1044/// access elements of operands of a matrix multiplication after creation of
1045/// the BLIS micro and macro kernels.
1046///
1047/// @see ScheduleTreeOptimizer::createMicroKernel
1048/// @see ScheduleTreeOptimizer::createMacroKernel
1049///
1050/// Subsequently, the described access relation is applied to the range of
1051/// @p MapOldIndVar, that is used to map original induction variables to
1052/// the ones, which are produced by schedule transformations. It helps to
1053/// define relations using a new space and, at the same time, keep them
1054/// in the original one.
1055///
1056/// @param MapOldIndVar The relation, which maps original induction variables
1057/// to the ones, which are produced by schedule
1058/// transformations.
1059/// @param FirstDim, SecondDim The input dimensions that are used to define
1060/// the specified access relation.
1061/// @return The specified access relation.
1062__isl_give isl_map *getMatMulAccRel(__isl_take isl_map *MapOldIndVar,
1063 unsigned FirstDim, unsigned SecondDim) {
1064 auto *Ctx = isl_map_get_ctx(MapOldIndVar);
1065 auto *AccessRelSpace = isl_space_alloc(Ctx, 0, 9, 3);
1066 auto *AccessRel = isl_map_universe(AccessRelSpace);
1067 AccessRel = isl_map_equate(AccessRel, isl_dim_in, FirstDim, isl_dim_out, 0);
1068 AccessRel = isl_map_equate(AccessRel, isl_dim_in, 5, isl_dim_out, 1);
1069 AccessRel = isl_map_equate(AccessRel, isl_dim_in, SecondDim, isl_dim_out, 2);
1070 return isl_map_apply_range(MapOldIndVar, AccessRel);
1071}
1072
1073__isl_give isl_schedule_node *
1074createExtensionNode(__isl_take isl_schedule_node *Node,
1075 __isl_take isl_map *ExtensionMap) {
1076 auto *Extension = isl_union_map_from_map(ExtensionMap);
1077 auto *NewNode = isl_schedule_node_from_extension(Extension);
1078 return isl_schedule_node_graft_before(Node, NewNode);
1079}
1080
1081/// Apply the packing transformation.
1082///
1083/// The packing transformation can be described as a data-layout
1084/// transformation that requires to introduce a new array, copy data
1085/// to the array, and change memory access locations to reference the array.
1086/// It can be used to ensure that elements of the new array are read in-stride
1087/// access, aligned to cache lines boundaries, and preloaded into certain cache
1088/// levels.
1089///
1090/// As an example let us consider the packing of the array A that would help
1091/// to read its elements with in-stride access. An access to the array A
1092/// is represented by an access relation that has the form
1093/// S[i, j, k] -> A[i, k]. The scheduling function of the SCoP statement S has
1094/// the form S[i,j, k] -> [floor((j mod Nc) / Nr), floor((i mod Mc) / Mr),
1095/// k mod Kc, j mod Nr, i mod Mr].
1096///
1097/// To ensure that elements of the array A are read in-stride access, we add
1098/// a new array Packed_A[Mc/Mr][Kc][Mr] to the SCoP, using
1099/// Scop::createScopArrayInfo, change the access relation
1100/// S[i, j, k] -> A[i, k] to
1101/// S[i, j, k] -> Packed_A[floor((i mod Mc) / Mr), k mod Kc, i mod Mr], using
1102/// MemoryAccess::setNewAccessRelation, and copy the data to the array, using
1103/// the copy statement created by Scop::addScopStmt.
1104///
1105/// @param Node The schedule node to be optimized.
1106/// @param MapOldIndVar The relation, which maps original induction variables
1107/// to the ones, which are produced by schedule
1108/// transformations.
1109/// @param MicroParams, MacroParams Parameters of the BLIS kernel
1110/// to be taken into account.
1111/// @param MMI Parameters of the matrix multiplication operands.
1112/// @return The optimized schedule node.
1113static __isl_give isl_schedule_node *optimizeDataLayoutMatrMulPattern(
1114 __isl_take isl_schedule_node *Node, __isl_take isl_map *MapOldIndVar,
1115 MicroKernelParamsTy MicroParams, MacroKernelParamsTy MacroParams,
1116 MatMulInfoTy &MMI) {
1117 auto InputDimsId = isl_map_get_tuple_id(MapOldIndVar, isl_dim_in);
1118 auto *Stmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimsId));
1119 isl_id_free(InputDimsId);
1120
1121 // Create a copy statement that corresponds to the memory access to the
1122 // matrix B, the second operand of the matrix multiplication.
1123 Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
1124 Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
1125 Node = isl_schedule_node_parent(Node);
1126 Node = isl_schedule_node_child(isl_schedule_node_band_split(Node, 2), 0);
1127 auto *AccRel = getMatMulAccRel(isl_map_copy(MapOldIndVar), 3, 7);
1128 unsigned FirstDimSize = MacroParams.Nc / MicroParams.Nr;
1129 unsigned SecondDimSize = MacroParams.Kc;
1130 unsigned ThirdDimSize = MicroParams.Nr;
1131 auto *SAI = Stmt->getParent()->createScopArrayInfo(
1132 MMI.B->getElementType(), "Packed_B",
1133 {FirstDimSize, SecondDimSize, ThirdDimSize});
1134 AccRel = isl_map_set_tuple_id(AccRel, isl_dim_out, SAI->getBasePtrId());
1135 auto *OldAcc = MMI.B->getAccessRelation();
1136 MMI.B->setNewAccessRelation(AccRel);
1137 auto *ExtMap =
1138 isl_map_project_out(isl_map_copy(MapOldIndVar), isl_dim_out, 2,
1139 isl_map_dim(MapOldIndVar, isl_dim_out) - 2);
1140 ExtMap = isl_map_reverse(ExtMap);
1141 ExtMap = isl_map_fix_si(ExtMap, isl_dim_out, MMI.i, 0);
1142 auto *Domain = Stmt->getDomain();
1143
1144 // Restrict the domains of the copy statements to only execute when also its
1145 // originating statement is executed.
1146 auto *DomainId = isl_set_get_tuple_id(Domain);
1147 auto *NewStmt = Stmt->getParent()->addScopStmt(
1148 OldAcc, MMI.B->getAccessRelation(), isl_set_copy(Domain));
1149 ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, isl_id_copy(DomainId));
1150 ExtMap = isl_map_intersect_range(ExtMap, isl_set_copy(Domain));
1151 ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, NewStmt->getDomainId());
1152 Node = createExtensionNode(Node, ExtMap);
1153
1154 // Create a copy statement that corresponds to the memory access
1155 // to the matrix A, the first operand of the matrix multiplication.
1156 Node = isl_schedule_node_child(Node, 0);
1157 AccRel = getMatMulAccRel(isl_map_copy(MapOldIndVar), 4, 6);
1158 FirstDimSize = MacroParams.Mc / MicroParams.Mr;
1159 ThirdDimSize = MicroParams.Mr;
1160 SAI = Stmt->getParent()->createScopArrayInfo(
1161 MMI.A->getElementType(), "Packed_A",
1162 {FirstDimSize, SecondDimSize, ThirdDimSize});
1163 AccRel = isl_map_set_tuple_id(AccRel, isl_dim_out, SAI->getBasePtrId());
1164 OldAcc = MMI.A->getAccessRelation();
1165 MMI.A->setNewAccessRelation(AccRel);
1166 ExtMap = isl_map_project_out(MapOldIndVar, isl_dim_out, 3,
1167 isl_map_dim(MapOldIndVar, isl_dim_out) - 3);
1168 ExtMap = isl_map_reverse(ExtMap);
1169 ExtMap = isl_map_fix_si(ExtMap, isl_dim_out, MMI.j, 0);
1170 NewStmt = Stmt->getParent()->addScopStmt(OldAcc, MMI.A->getAccessRelation(),
1171 isl_set_copy(Domain));
1172
1173 // Restrict the domains of the copy statements to only execute when also its
1174 // originating statement is executed.
1175 ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, DomainId);
1176 ExtMap = isl_map_intersect_range(ExtMap, Domain);
1177 ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, NewStmt->getDomainId());
1178 Node = createExtensionNode(Node, ExtMap);
1179 Node = isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);
1180 return isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);
1181}
1182
1183/// Get a relation mapping induction variables produced by schedule
1184/// transformations to the original ones.
1185///
1186/// @param Node The schedule node produced as the result of creation
1187/// of the BLIS kernels.
1188/// @param MicroKernelParams, MacroKernelParams Parameters of the BLIS kernel
1189/// to be taken into account.
1190/// @return The relation mapping original induction variables to the ones
1191/// produced by schedule transformation.
1192/// @see ScheduleTreeOptimizer::createMicroKernel
1193/// @see ScheduleTreeOptimizer::createMacroKernel
1194/// @see getMacroKernelParams
1195__isl_give isl_map *
1196getInductionVariablesSubstitution(__isl_take isl_schedule_node *Node,
1197 MicroKernelParamsTy MicroKernelParams,
1198 MacroKernelParamsTy MacroKernelParams) {
1199 auto *Child = isl_schedule_node_get_child(Node, 0);
1200 auto *UnMapOldIndVar = isl_schedule_node_get_prefix_schedule_union_map(Child);
1201 isl_schedule_node_free(Child);
1202 auto *MapOldIndVar = isl_map_from_union_map(UnMapOldIndVar);
1203 if (isl_map_dim(MapOldIndVar, isl_dim_out) > 9)
1204 MapOldIndVar =
1205 isl_map_project_out(MapOldIndVar, isl_dim_out, 0,
1206 isl_map_dim(MapOldIndVar, isl_dim_out) - 9);
1207 return MapOldIndVar;
1208}
1209
1210/// Isolate a set of partial tile prefixes and unroll the isolated part.
1211///
1212/// The set should ensure that it contains only partial tile prefixes that have
1213/// exactly Mr x Nr iterations of the two innermost loops produced by
1214/// the optimization of the matrix multiplication. Mr and Nr are parameters of
1215/// the micro-kernel.
1216///
1217/// In case of parametric bounds, this helps to auto-vectorize the unrolled
1218/// innermost loops, using the SLP vectorizer.
1219///
1220/// @param Node The schedule node to be modified.
1221/// @param MicroKernelParams Parameters of the micro-kernel
1222/// to be taken into account.
1223/// @return The modified isl_schedule_node.
1224static __isl_give isl_schedule_node *
1225isolateAndUnrollMatMulInnerLoops(__isl_take isl_schedule_node *Node,
1226 struct MicroKernelParamsTy MicroKernelParams) {
1227 auto *Child = isl_schedule_node_get_child(Node, 0);
1228 auto *UnMapOldIndVar = isl_schedule_node_get_prefix_schedule_relation(Child);
1229 isl_schedule_node_free(Child);
1230 auto *Prefix = isl_map_range(isl_map_from_union_map(UnMapOldIndVar));
1231 auto Dims = isl_set_dim(Prefix, isl_dim_set);
1232 Prefix = isl_set_project_out(Prefix, isl_dim_set, Dims - 1, 1);
1233 Prefix = getPartialTilePrefixes(Prefix, MicroKernelParams.Nr);
1234 Prefix = getPartialTilePrefixes(Prefix, MicroKernelParams.Mr);
1235 auto *IsolateOption = getIsolateOptions(
1236 isl_set_add_dims(isl_set_copy(Prefix), isl_dim_set, 3), 3);
1237 auto *Ctx = isl_schedule_node_get_ctx(Node);
1238 auto *AtomicOption = getAtomicOptions(Ctx);
1239 auto *Options =
1240 isl_union_set_union(IsolateOption, isl_union_set_copy(AtomicOption));
1241 Options = isl_union_set_union(Options, getUnrollIsolatedSetOptions(Ctx));
1242 Node = isl_schedule_node_band_set_ast_build_options(Node, Options);
1243 Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
1244 IsolateOption = getIsolateOptions(Prefix, 3);
1245 Options = isl_union_set_union(IsolateOption, AtomicOption);
1246 Node = isl_schedule_node_band_set_ast_build_options(Node, Options);
1247 Node = isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);
1248 return Node;
1249}
1250
1251__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
1252 __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI,
1253 MatMulInfoTy &MMI) {
1254 assert(TTI && "The target transform info should be provided.")((TTI && "The target transform info should be provided."
) ? static_cast<void> (0) : __assert_fail ("TTI && \"The target transform info should be provided.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1254, __PRETTY_FUNCTION__))
;
1255 int DimOutNum = isl_schedule_node_band_n_member(Node);
1256 assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest "((DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("DimOutNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1258, __PRETTY_FUNCTION__))
1257 "and, consequently, the corresponding scheduling "((DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("DimOutNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1258, __PRETTY_FUNCTION__))
1258 "functions have at least three dimensions.")((DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling " "functions have at least three dimensions."
) ? static_cast<void> (0) : __assert_fail ("DimOutNum > 2 && \"In case of the matrix multiplication the loop nest \" \"and, consequently, the corresponding scheduling \" \"functions have at least three dimensions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1258, __PRETTY_FUNCTION__))
;
1259 Node = permuteBandNodeDimensions(Node, MMI.i, DimOutNum - 3);
1260 int NewJ = MMI.j == DimOutNum - 3 ? MMI.i : MMI.j;
1261 int NewK = MMI.k == DimOutNum - 3 ? MMI.i : MMI.k;
Value stored to 'NewK' during its initialization is never read
1262 Node = permuteBandNodeDimensions(Node, NewJ, DimOutNum - 2);
1263 NewK = MMI.k == DimOutNum - 2 ? MMI.j : MMI.k;
1264 Node = permuteBandNodeDimensions(Node, NewK, DimOutNum - 1);
1265 auto MicroKernelParams = getMicroKernelParams(TTI, MMI);
1266 auto MacroKernelParams = getMacroKernelParams(MicroKernelParams, MMI);
1267 Node = createMacroKernel(Node, MacroKernelParams);
1268 Node = createMicroKernel(Node, MicroKernelParams);
1269 if (MacroKernelParams.Mc == 1 || MacroKernelParams.Nc == 1 ||
1270 MacroKernelParams.Kc == 1)
1271 return Node;
1272 auto *MapOldIndVar = getInductionVariablesSubstitution(
1273 Node, MicroKernelParams, MacroKernelParams);
1274 if (!MapOldIndVar)
1275 return Node;
1276 Node = isolateAndUnrollMatMulInnerLoops(Node, MicroKernelParams);
1277 return optimizeDataLayoutMatrMulPattern(Node, MapOldIndVar, MicroKernelParams,
1278 MacroKernelParams, MMI);
1279}
1280
1281bool ScheduleTreeOptimizer::isMatrMultPattern(
1282 __isl_keep isl_schedule_node *Node, const Dependences *D,
1283 MatMulInfoTy &MMI) {
1284 auto *PartialSchedule =
1285 isl_schedule_node_band_get_partial_schedule_union_map(Node);
1286 if (isl_schedule_node_band_n_member(Node) < 3 ||
1287 isl_union_map_n_map(PartialSchedule) != 1) {
1288 isl_union_map_free(PartialSchedule);
1289 return false;
1290 }
1291 auto *NewPartialSchedule = isl_map_from_union_map(PartialSchedule);
1292 if (containsMatrMult(NewPartialSchedule, D, MMI)) {
1293 isl_map_free(NewPartialSchedule);
1294 return true;
1295 }
1296 isl_map_free(NewPartialSchedule);
1297 return false;
1298}
1299
1300__isl_give isl_schedule_node *
1301ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
1302 void *User) {
1303 if (!isTileableBandNode(Node))
1304 return Node;
1305
1306 const OptimizerAdditionalInfoTy *OAI =
1307 static_cast<const OptimizerAdditionalInfoTy *>(User);
1308
1309 MatMulInfoTy MMI;
1310 if (PMBasedOpts && User && isMatrMultPattern(Node, OAI->D, MMI)) {
1311 DEBUG(dbgs() << "The matrix multiplication pattern was detected\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { dbgs() << "The matrix multiplication pattern was detected\n"
; } } while (false)
;
1312 return optimizeMatMulPattern(Node, OAI->TTI, MMI);
1313 }
1314
1315 return standardBandOpts(Node, User);
1316}
1317
1318__isl_give isl_schedule *
1319ScheduleTreeOptimizer::optimizeSchedule(__isl_take isl_schedule *Schedule,
1320 const OptimizerAdditionalInfoTy *OAI) {
1321 isl_schedule_node *Root = isl_schedule_get_root(Schedule);
1322 Root = optimizeScheduleNode(Root, OAI);
1323 isl_schedule_free(Schedule);
1324 auto S = isl_schedule_node_get_schedule(Root);
1325 isl_schedule_node_free(Root);
1326 return S;
1327}
1328
1329__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeScheduleNode(
1330 __isl_take isl_schedule_node *Node, const OptimizerAdditionalInfoTy *OAI) {
1331 Node = isl_schedule_node_map_descendant_bottom_up(
1332 Node, optimizeBand, const_cast<void *>(static_cast<const void *>(OAI)));
1333 return Node;
1334}
1335
1336bool ScheduleTreeOptimizer::isProfitableSchedule(
1337 Scop &S, __isl_keep isl_schedule *NewSchedule) {
1338 // To understand if the schedule has been optimized we check if the schedule
1339 // has changed at all.
1340 // TODO: We can improve this by tracking if any necessarily beneficial
1341 // transformations have been performed. This can e.g. be tiling, loop
1342 // interchange, or ...) We can track this either at the place where the
1343 // transformation has been performed or, in case of automatic ILP based
1344 // optimizations, by comparing (yet to be defined) performance metrics
1345 // before/after the scheduling optimizer
1346 // (e.g., #stride-one accesses)
1347 if (S.containsExtensionNode(NewSchedule))
1348 return true;
1349 auto *NewScheduleMap = isl_schedule_get_map(NewSchedule);
1350 isl_union_map *OldSchedule = S.getSchedule();
1351 assert(OldSchedule && "Only IslScheduleOptimizer can insert extension nodes "((OldSchedule && "Only IslScheduleOptimizer can insert extension nodes "
"that make Scop::getSchedule() return nullptr.") ? static_cast
<void> (0) : __assert_fail ("OldSchedule && \"Only IslScheduleOptimizer can insert extension nodes \" \"that make Scop::getSchedule() return nullptr.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1352, __PRETTY_FUNCTION__))
1352 "that make Scop::getSchedule() return nullptr.")((OldSchedule && "Only IslScheduleOptimizer can insert extension nodes "
"that make Scop::getSchedule() return nullptr.") ? static_cast
<void> (0) : __assert_fail ("OldSchedule && \"Only IslScheduleOptimizer can insert extension nodes \" \"that make Scop::getSchedule() return nullptr.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/tools/polly/lib/Transform/ScheduleOptimizer.cpp"
, 1352, __PRETTY_FUNCTION__))
;
1353 bool changed = !isl_union_map_is_equal(OldSchedule, NewScheduleMap);
1354 isl_union_map_free(OldSchedule);
1355 isl_union_map_free(NewScheduleMap);
1356 return changed;
1357}
1358
1359namespace {
1360class IslScheduleOptimizer : public ScopPass {
1361public:
1362 static char ID;
1363 explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = nullptr; }
1364
1365 ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); }
1366
1367 /// Optimize the schedule of the SCoP @p S.
1368 bool runOnScop(Scop &S) override;
1369
1370 /// Print the new schedule for the SCoP @p S.
1371 void printScop(raw_ostream &OS, Scop &S) const override;
1372
1373 /// Register all analyses and transformation required.
1374 void getAnalysisUsage(AnalysisUsage &AU) const override;
1375
1376 /// Release the internal memory.
1377 void releaseMemory() override {
1378 isl_schedule_free(LastSchedule);
1379 LastSchedule = nullptr;
1380 }
1381
1382private:
1383 isl_schedule *LastSchedule;
1384};
1385} // namespace
1386
1387char IslScheduleOptimizer::ID = 0;
1388
1389bool IslScheduleOptimizer::runOnScop(Scop &S) {
1390
1391 // Skip empty SCoPs but still allow code generation as it will delete the
1392 // loops present but not needed.
1393 if (S.getSize() == 0) {
1394 S.markAsOptimized();
1395 return false;
1396 }
1397
1398 const Dependences &D =
1399 getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
1400
1401 if (!D.hasValidDependences())
1402 return false;
1403
1404 isl_schedule_free(LastSchedule);
1405 LastSchedule = nullptr;
1406
1407 // Build input data.
1408 int ValidityKinds =
1409 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
1410 int ProximityKinds;
1411
1412 if (OptimizeDeps == "all")
1413 ProximityKinds =
1414 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
1415 else if (OptimizeDeps == "raw")
1416 ProximityKinds = Dependences::TYPE_RAW;
1417 else {
1418 errs() << "Do not know how to optimize for '" << OptimizeDeps << "'"
1419 << " Falling back to optimizing all dependences.\n";
1420 ProximityKinds =
1421 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
1422 }
1423
1424 isl_union_set *Domain = S.getDomains();
1425
1426 if (!Domain)
1427 return false;
1428
1429 isl_union_map *Validity = D.getDependences(ValidityKinds);
1430 isl_union_map *Proximity = D.getDependences(ProximityKinds);
1431
1432 // Simplify the dependences by removing the constraints introduced by the
1433 // domains. This can speed up the scheduling time significantly, as large
1434 // constant coefficients will be removed from the dependences. The
1435 // introduction of some additional dependences reduces the possible
1436 // transformations, but in most cases, such transformation do not seem to be
1437 // interesting anyway. In some cases this option may stop the scheduler to
1438 // find any schedule.
1439 if (SimplifyDeps == "yes") {
1440 Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain));
1441 Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain));
1442 Proximity =
1443 isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain));
1444 Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain));
1445 } else if (SimplifyDeps != "no") {
1446 errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
1447 "or 'no'. Falling back to default: 'yes'\n";
1448 }
1449
1450 DEBUG(dbgs() << "\n\nCompute schedule from: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { dbgs() << "\n\nCompute schedule from: "
; } } while (false)
;
1451 DEBUG(dbgs() << "Domain := " << stringFromIslObj(Domain) << ";\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { dbgs() << "Domain := " << stringFromIslObj
(Domain) << ";\n"; } } while (false)
;
1452 DEBUG(dbgs() << "Proximity := " << stringFromIslObj(Proximity) << ";\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { dbgs() << "Proximity := " <<
stringFromIslObj(Proximity) << ";\n"; } } while (false
)
;
1453 DEBUG(dbgs() << "Validity := " << stringFromIslObj(Validity) << ";\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { dbgs() << "Validity := " << stringFromIslObj
(Validity) << ";\n"; } } while (false)
;
1454
1455 unsigned IslSerializeSCCs;
1456
1457 if (FusionStrategy == "max") {
1458 IslSerializeSCCs = 0;
1459 } else if (FusionStrategy == "min") {
1460 IslSerializeSCCs = 1;
1461 } else {
1462 errs() << "warning: Unknown fusion strategy. Falling back to maximal "
1463 "fusion.\n";
1464 IslSerializeSCCs = 0;
1465 }
1466
1467 int IslMaximizeBands;
1468
1469 if (MaximizeBandDepth == "yes") {
1470 IslMaximizeBands = 1;
1471 } else if (MaximizeBandDepth == "no") {
1472 IslMaximizeBands = 0;
1473 } else {
1474 errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'"
1475 " or 'no'. Falling back to default: 'yes'\n";
1476 IslMaximizeBands = 1;
1477 }
1478
1479 int IslOuterCoincidence;
1480
1481 if (OuterCoincidence == "yes") {
1482 IslOuterCoincidence = 1;
1483 } else if (OuterCoincidence == "no") {
1484 IslOuterCoincidence = 0;
1485 } else {
1486 errs() << "warning: Option -polly-opt-outer-coincidence should either be "
1487 "'yes' or 'no'. Falling back to default: 'no'\n";
1488 IslOuterCoincidence = 0;
1489 }
1490
1491 isl_ctx *Ctx = S.getIslCtx();
1492
1493 isl_options_set_schedule_outer_coincidence(Ctx, IslOuterCoincidence);
1494 isl_options_set_schedule_serialize_sccs(Ctx, IslSerializeSCCs);
1495 isl_options_set_schedule_maximize_band_depth(Ctx, IslMaximizeBands);
1496 isl_options_set_schedule_max_constant_term(Ctx, MaxConstantTerm);
1497 isl_options_set_schedule_max_coefficient(Ctx, MaxCoefficient);
1498 isl_options_set_tile_scale_tile_loops(Ctx, 0);
1499
1500 auto OnErrorStatus = isl_options_get_on_error(Ctx);
1501 isl_options_set_on_error(Ctx, ISL_ON_ERROR_CONTINUE1);
1502
1503 isl_schedule_constraints *ScheduleConstraints;
1504 ScheduleConstraints = isl_schedule_constraints_on_domain(Domain);
1505 ScheduleConstraints =
1506 isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity);
1507 ScheduleConstraints = isl_schedule_constraints_set_validity(
1508 ScheduleConstraints, isl_union_map_copy(Validity));
1509 ScheduleConstraints =
1510 isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity);
1511 isl_schedule *Schedule;
1512 Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints);
1513 isl_options_set_on_error(Ctx, OnErrorStatus);
1514
1515 // In cases the scheduler is not able to optimize the code, we just do not
1516 // touch the schedule.
1517 if (!Schedule)
1518 return false;
1519
1520 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1521 auto *P = isl_printer_to_str(Ctx);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1522 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1523 P = isl_printer_print_schedule(P, Schedule);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1524 auto *str = isl_printer_get_str(P);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1525 dbgs() << "NewScheduleTree: \n" << str << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1526 free(str);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1527 isl_printer_free(P);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
1528 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("polly-opt-isl")) { { auto *P = isl_printer_to_str(Ctx); P =
isl_printer_set_yaml_style(P, 0); P = isl_printer_print_schedule
(P, Schedule); auto *str = isl_printer_get_str(P); dbgs() <<
"NewScheduleTree: \n" << str << "\n"; free(str);
isl_printer_free(P); }; } } while (false)
;
1529
1530 Function &F = S.getFunction();
1531 auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1532 const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
1533 isl_schedule *NewSchedule =
1534 ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
1535
1536 if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule)) {
1537 isl_schedule_free(NewSchedule);
1538 return false;
1539 }
1540
1541 S.setScheduleTree(NewSchedule);
1542 S.markAsOptimized();
1543
1544 if (OptimizedScops)
1545 S.dump();
1546
1547 return false;
1548}
1549
1550void IslScheduleOptimizer::printScop(raw_ostream &OS, Scop &) const {
1551 isl_printer *p;
1552 char *ScheduleStr;
1553
1554 OS << "Calculated schedule:\n";
1555
1556 if (!LastSchedule) {
1557 OS << "n/a\n";
1558 return;
1559 }
1560
1561 p = isl_printer_to_str(isl_schedule_get_ctx(LastSchedule));
1562 p = isl_printer_print_schedule(p, LastSchedule);
1563 ScheduleStr = isl_printer_get_str(p);
1564 isl_printer_free(p);
1565
1566 OS << ScheduleStr << "\n";
1567}
1568
1569void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
1570 ScopPass::getAnalysisUsage(AU);
1571 AU.addRequired<DependenceInfo>();
1572 AU.addRequired<TargetTransformInfoWrapperPass>();
1573}
1574
1575Pass *polly::createIslScheduleOptimizerPass() {
1576 return new IslScheduleOptimizer();
1577}
1578
1579INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl",static void *initializeIslScheduleOptimizerPassOnce(PassRegistry
&Registry) {
1580 "Polly - Optimize schedule of SCoP", false, false)static void *initializeIslScheduleOptimizerPassOnce(PassRegistry
&Registry) {
;
1581INITIALIZE_PASS_DEPENDENCY(DependenceInfo)initializeDependenceInfoPass(Registry);;
1582INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass)initializeScopInfoRegionPassPass(Registry);;
1583INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);;
1584INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl",PassInfo *PI = new PassInfo( "Polly - Optimize schedule of SCoP"
, "polly-opt-isl", &IslScheduleOptimizer::ID, PassInfo::NormalCtor_t
(callDefaultCtor<IslScheduleOptimizer>), false, false);
Registry.registerPass(*PI, true); return PI; } static llvm::
once_flag InitializeIslScheduleOptimizerPassFlag; void llvm::
initializeIslScheduleOptimizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeIslScheduleOptimizerPassFlag, initializeIslScheduleOptimizerPassOnce
, std::ref(Registry)); }
1585 "Polly - Optimize schedule of SCoP", false, false)PassInfo *PI = new PassInfo( "Polly - Optimize schedule of SCoP"
, "polly-opt-isl", &IslScheduleOptimizer::ID, PassInfo::NormalCtor_t
(callDefaultCtor<IslScheduleOptimizer>), false, false);
Registry.registerPass(*PI, true); return PI; } static llvm::
once_flag InitializeIslScheduleOptimizerPassFlag; void llvm::
initializeIslScheduleOptimizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeIslScheduleOptimizerPassFlag, initializeIslScheduleOptimizerPassOnce
, std::ref(Registry)); }