File: | polly/lib/External/isl/isl_scheduler.c |
Warning: | line 3063, column 2 Value stored to 'nrow' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright 2011 INRIA Saclay |
3 | * Copyright 2012-2014 Ecole Normale Superieure |
4 | * Copyright 2015-2016 Sven Verdoolaege |
5 | * Copyright 2016 INRIA Paris |
6 | * Copyright 2017 Sven Verdoolaege |
7 | * |
8 | * Use of this software is governed by the MIT license |
9 | * |
10 | * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, |
11 | * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, |
12 | * 91893 Orsay, France |
13 | * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France |
14 | * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12, |
15 | * CS 42112, 75589 Paris Cedex 12, France |
16 | */ |
17 | |
18 | #include <isl_ctx_private.h> |
19 | #include <isl_map_private.h> |
20 | #include <isl_space_private.h> |
21 | #include <isl_aff_private.h> |
22 | #include <isl/hash.h> |
23 | #include <isl/id.h> |
24 | #include <isl/constraint.h> |
25 | #include <isl/schedule.h> |
26 | #include <isl_schedule_constraints.h> |
27 | #include <isl/schedule_node.h> |
28 | #include <isl_mat_private.h> |
29 | #include <isl_vec_private.h> |
30 | #include <isl/set.h> |
31 | #include <isl_union_set_private.h> |
32 | #include <isl_seq.h> |
33 | #include <isl_tab.h> |
34 | #include <isl_dim_map.h> |
35 | #include <isl/map_to_basic_set.h> |
36 | #include <isl_sort.h> |
37 | #include <isl_options_private.h> |
38 | #include <isl_tarjan.h> |
39 | #include <isl_morph.h> |
40 | #include <isl/ilp.h> |
41 | #include <isl_val_private.h> |
42 | |
43 | /* |
44 | * The scheduling algorithm implemented in this file was inspired by |
45 | * Bondhugula et al., "Automatic Transformations for Communication-Minimized |
46 | * Parallelization and Locality Optimization in the Polyhedral Model". |
47 | * |
48 | * For a detailed description of the variant implemented in isl, |
49 | * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017). |
50 | */ |
51 | |
52 | |
53 | /* Internal information about a node that is used during the construction |
54 | * of a schedule. |
55 | * space represents the original space in which the domain lives; |
56 | * that is, the space is not affected by compression |
57 | * sched is a matrix representation of the schedule being constructed |
58 | * for this node; if compressed is set, then this schedule is |
59 | * defined over the compressed domain space |
60 | * sched_map is an isl_map representation of the same (partial) schedule |
61 | * sched_map may be NULL; if compressed is set, then this map |
62 | * is defined over the uncompressed domain space |
63 | * rank is the number of linearly independent rows in the linear part |
64 | * of sched |
65 | * the rows of "vmap" represent a change of basis for the node |
66 | * variables; the first rank rows span the linear part of |
67 | * the schedule rows; the remaining rows are linearly independent |
68 | * the rows of "indep" represent linear combinations of the schedule |
69 | * coefficients that are non-zero when the schedule coefficients are |
70 | * linearly independent of previously computed schedule rows. |
71 | * start is the first variable in the LP problem in the sequences that |
72 | * represents the schedule coefficients of this node |
73 | * nvar is the dimension of the (compressed) domain |
74 | * nparam is the number of parameters or 0 if we are not constructing |
75 | * a parametric schedule |
76 | * |
77 | * If compressed is set, then hull represents the constraints |
78 | * that were used to derive the compression, while compress and |
79 | * decompress map the original space to the compressed space and |
80 | * vice versa. |
81 | * |
82 | * scc is the index of SCC (or WCC) this node belongs to |
83 | * |
84 | * "cluster" is only used inside extract_clusters and identifies |
85 | * the cluster of SCCs that the node belongs to. |
86 | * |
87 | * coincident contains a boolean for each of the rows of the schedule, |
88 | * indicating whether the corresponding scheduling dimension satisfies |
89 | * the coincidence constraints in the sense that the corresponding |
90 | * dependence distances are zero. |
91 | * |
92 | * If the schedule_treat_coalescing option is set, then |
93 | * "sizes" contains the sizes of the (compressed) instance set |
94 | * in each direction. If there is no fixed size in a given direction, |
95 | * then the corresponding size value is set to infinity. |
96 | * If the schedule_treat_coalescing option or the schedule_max_coefficient |
97 | * option is set, then "max" contains the maximal values for |
98 | * schedule coefficients of the (compressed) variables. If no bound |
99 | * needs to be imposed on a particular variable, then the corresponding |
100 | * value is negative. |
101 | * If not NULL, then "bounds" contains a non-parametric set |
102 | * in the compressed space that is bounded by the size in each direction. |
103 | */ |
104 | struct isl_sched_node { |
105 | isl_space *space; |
106 | int compressed; |
107 | isl_setisl_map *hull; |
108 | isl_multi_aff *compress; |
109 | isl_multi_aff *decompress; |
110 | isl_mat *sched; |
111 | isl_map *sched_map; |
112 | int rank; |
113 | isl_mat *indep; |
114 | isl_mat *vmap; |
115 | int start; |
116 | int nvar; |
117 | int nparam; |
118 | |
119 | int scc; |
120 | int cluster; |
121 | |
122 | int *coincident; |
123 | |
124 | isl_multi_val *sizes; |
125 | isl_basic_setisl_basic_map *bounds; |
126 | isl_vec *max; |
127 | }; |
128 | |
129 | static int node_has_tuples(const void *entry, const void *val) |
130 | { |
131 | struct isl_sched_node *node = (struct isl_sched_node *)entry; |
132 | isl_space *space = (isl_space *) val; |
133 | |
134 | return isl_space_has_equal_tuples(node->space, space); |
135 | } |
136 | |
137 | static int node_scc_exactly(struct isl_sched_node *node, int scc) |
138 | { |
139 | return node->scc == scc; |
140 | } |
141 | |
142 | static int node_scc_at_most(struct isl_sched_node *node, int scc) |
143 | { |
144 | return node->scc <= scc; |
145 | } |
146 | |
147 | static int node_scc_at_least(struct isl_sched_node *node, int scc) |
148 | { |
149 | return node->scc >= scc; |
150 | } |
151 | |
152 | /* An edge in the dependence graph. An edge may be used to |
153 | * ensure validity of the generated schedule, to minimize the dependence |
154 | * distance or both |
155 | * |
156 | * map is the dependence relation, with i -> j in the map if j depends on i |
157 | * tagged_condition and tagged_validity contain the union of all tagged |
158 | * condition or conditional validity dependence relations that |
159 | * specialize the dependence relation "map"; that is, |
160 | * if (i -> a) -> (j -> b) is an element of "tagged_condition" |
161 | * or "tagged_validity", then i -> j is an element of "map". |
162 | * If these fields are NULL, then they represent the empty relation. |
163 | * src is the source node |
164 | * dst is the sink node |
165 | * |
166 | * types is a bit vector containing the types of this edge. |
167 | * validity is set if the edge is used to ensure correctness |
168 | * coincidence is used to enforce zero dependence distances |
169 | * proximity is set if the edge is used to minimize dependence distances |
170 | * condition is set if the edge represents a condition |
171 | * for a conditional validity schedule constraint |
172 | * local can only be set for condition edges and indicates that |
173 | * the dependence distance over the edge should be zero |
174 | * conditional_validity is set if the edge is used to conditionally |
175 | * ensure correctness |
176 | * |
177 | * For validity edges, start and end mark the sequence of inequality |
178 | * constraints in the LP problem that encode the validity constraint |
179 | * corresponding to this edge. |
180 | * |
181 | * During clustering, an edge may be marked "no_merge" if it should |
182 | * not be used to merge clusters. |
183 | * The weight is also only used during clustering and it is |
184 | * an indication of how many schedule dimensions on either side |
185 | * of the schedule constraints can be aligned. |
186 | * If the weight is negative, then this means that this edge was postponed |
187 | * by has_bounded_distances or any_no_merge. The original weight can |
188 | * be retrieved by adding 1 + graph->max_weight, with "graph" |
189 | * the graph containing this edge. |
190 | */ |
191 | struct isl_sched_edge { |
192 | isl_map *map; |
193 | isl_union_map *tagged_condition; |
194 | isl_union_map *tagged_validity; |
195 | |
196 | struct isl_sched_node *src; |
197 | struct isl_sched_node *dst; |
198 | |
199 | unsigned types; |
200 | |
201 | int start; |
202 | int end; |
203 | |
204 | int no_merge; |
205 | int weight; |
206 | }; |
207 | |
208 | /* Is "edge" marked as being of type "type"? |
209 | */ |
210 | static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
211 | { |
212 | return ISL_FL_ISSET(edge->types, 1 << type)(!!((edge->types) & (1 << type))); |
213 | } |
214 | |
215 | /* Mark "edge" as being of type "type". |
216 | */ |
217 | static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
218 | { |
219 | ISL_FL_SET(edge->types, 1 << type)((edge->types) |= (1 << type)); |
220 | } |
221 | |
222 | /* No longer mark "edge" as being of type "type"? |
223 | */ |
224 | static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
225 | { |
226 | ISL_FL_CLR(edge->types, 1 << type)((edge->types) &= ~(1 << type)); |
227 | } |
228 | |
229 | /* Is "edge" marked as a validity edge? |
230 | */ |
231 | static int is_validity(struct isl_sched_edge *edge) |
232 | { |
233 | return is_type(edge, isl_edge_validity); |
234 | } |
235 | |
236 | /* Mark "edge" as a validity edge. |
237 | */ |
238 | static void set_validity(struct isl_sched_edge *edge) |
239 | { |
240 | set_type(edge, isl_edge_validity); |
241 | } |
242 | |
243 | /* Is "edge" marked as a proximity edge? |
244 | */ |
245 | static int is_proximity(struct isl_sched_edge *edge) |
246 | { |
247 | return is_type(edge, isl_edge_proximity); |
248 | } |
249 | |
250 | /* Is "edge" marked as a local edge? |
251 | */ |
252 | static int is_local(struct isl_sched_edge *edge) |
253 | { |
254 | return is_type(edge, isl_edge_local); |
255 | } |
256 | |
257 | /* Mark "edge" as a local edge. |
258 | */ |
259 | static void set_local(struct isl_sched_edge *edge) |
260 | { |
261 | set_type(edge, isl_edge_local); |
262 | } |
263 | |
264 | /* No longer mark "edge" as a local edge. |
265 | */ |
266 | static void clear_local(struct isl_sched_edge *edge) |
267 | { |
268 | clear_type(edge, isl_edge_local); |
269 | } |
270 | |
271 | /* Is "edge" marked as a coincidence edge? |
272 | */ |
273 | static int is_coincidence(struct isl_sched_edge *edge) |
274 | { |
275 | return is_type(edge, isl_edge_coincidence); |
276 | } |
277 | |
278 | /* Is "edge" marked as a condition edge? |
279 | */ |
280 | static int is_condition(struct isl_sched_edge *edge) |
281 | { |
282 | return is_type(edge, isl_edge_condition); |
283 | } |
284 | |
285 | /* Is "edge" marked as a conditional validity edge? |
286 | */ |
287 | static int is_conditional_validity(struct isl_sched_edge *edge) |
288 | { |
289 | return is_type(edge, isl_edge_conditional_validity); |
290 | } |
291 | |
292 | /* Is "edge" of a type that can appear multiple times between |
293 | * the same pair of nodes? |
294 | * |
295 | * Condition edges and conditional validity edges may have tagged |
296 | * dependence relations, in which case an edge is added for each |
297 | * pair of tags. |
298 | */ |
299 | static int is_multi_edge_type(struct isl_sched_edge *edge) |
300 | { |
301 | return is_condition(edge) || is_conditional_validity(edge); |
302 | } |
303 | |
304 | /* Internal information about the dependence graph used during |
305 | * the construction of the schedule. |
306 | * |
307 | * intra_hmap is a cache, mapping dependence relations to their dual, |
308 | * for dependences from a node to itself, possibly without |
309 | * coefficients for the parameters |
310 | * intra_hmap_param is a cache, mapping dependence relations to their dual, |
311 | * for dependences from a node to itself, including coefficients |
312 | * for the parameters |
313 | * inter_hmap is a cache, mapping dependence relations to their dual, |
314 | * for dependences between distinct nodes |
315 | * if compression is involved then the key for these maps |
316 | * is the original, uncompressed dependence relation, while |
317 | * the value is the dual of the compressed dependence relation. |
318 | * |
319 | * n is the number of nodes |
320 | * node is the list of nodes |
321 | * maxvar is the maximal number of variables over all nodes |
322 | * max_row is the allocated number of rows in the schedule |
323 | * n_row is the current (maximal) number of linearly independent |
324 | * rows in the node schedules |
325 | * n_total_row is the current number of rows in the node schedules |
326 | * band_start is the starting row in the node schedules of the current band |
327 | * root is set to the original dependence graph from which this graph |
328 | * is derived through splitting. If this graph is not the result of |
329 | * splitting, then the root field points to the graph itself. |
330 | * |
331 | * sorted contains a list of node indices sorted according to the |
332 | * SCC to which a node belongs |
333 | * |
334 | * n_edge is the number of edges |
335 | * edge is the list of edges |
336 | * max_edge contains the maximal number of edges of each type; |
337 | * in particular, it contains the number of edges in the inital graph. |
338 | * edge_table contains pointers into the edge array, hashed on the source |
339 | * and sink spaces; there is one such table for each type; |
340 | * a given edge may be referenced from more than one table |
341 | * if the corresponding relation appears in more than one of the |
342 | * sets of dependences; however, for each type there is only |
343 | * a single edge between a given pair of source and sink space |
344 | * in the entire graph |
345 | * |
346 | * node_table contains pointers into the node array, hashed on the space tuples |
347 | * |
348 | * region contains a list of variable sequences that should be non-trivial |
349 | * |
350 | * lp contains the (I)LP problem used to obtain new schedule rows |
351 | * |
352 | * src_scc and dst_scc are the source and sink SCCs of an edge with |
353 | * conflicting constraints |
354 | * |
355 | * scc represents the number of components |
356 | * weak is set if the components are weakly connected |
357 | * |
358 | * max_weight is used during clustering and represents the maximal |
359 | * weight of the relevant proximity edges. |
360 | */ |
361 | struct isl_sched_graph { |
362 | isl_map_to_basic_set *intra_hmap; |
363 | isl_map_to_basic_set *intra_hmap_param; |
364 | isl_map_to_basic_set *inter_hmap; |
365 | |
366 | struct isl_sched_node *node; |
367 | int n; |
368 | int maxvar; |
369 | int max_row; |
370 | int n_row; |
371 | |
372 | int *sorted; |
373 | |
374 | int n_total_row; |
375 | int band_start; |
376 | |
377 | struct isl_sched_graph *root; |
378 | |
379 | struct isl_sched_edge *edge; |
380 | int n_edge; |
381 | int max_edge[isl_edge_last + 1]; |
382 | struct isl_hash_table *edge_table[isl_edge_last + 1]; |
383 | |
384 | struct isl_hash_table *node_table; |
385 | struct isl_trivial_region *region; |
386 | |
387 | isl_basic_setisl_basic_map *lp; |
388 | |
389 | int src_scc; |
390 | int dst_scc; |
391 | |
392 | int scc; |
393 | int weak; |
394 | |
395 | int max_weight; |
396 | }; |
397 | |
398 | /* Initialize node_table based on the list of nodes. |
399 | */ |
400 | static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph) |
401 | { |
402 | int i; |
403 | |
404 | graph->node_table = isl_hash_table_alloc(ctx, graph->n); |
405 | if (!graph->node_table) |
406 | return -1; |
407 | |
408 | for (i = 0; i < graph->n; ++i) { |
409 | struct isl_hash_table_entry *entry; |
410 | uint32_t hash; |
411 | |
412 | hash = isl_space_get_tuple_hash(graph->node[i].space); |
413 | entry = isl_hash_table_find(ctx, graph->node_table, hash, |
414 | &node_has_tuples, |
415 | graph->node[i].space, 1); |
416 | if (!entry) |
417 | return -1; |
418 | entry->data = &graph->node[i]; |
419 | } |
420 | |
421 | return 0; |
422 | } |
423 | |
424 | /* Return a pointer to the node that lives within the given space, |
425 | * an invalid node if there is no such node, or NULL in case of error. |
426 | */ |
427 | static struct isl_sched_node *graph_find_node(isl_ctx *ctx, |
428 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
429 | { |
430 | struct isl_hash_table_entry *entry; |
431 | uint32_t hash; |
432 | |
433 | if (!space) |
434 | return NULL((void*)0); |
435 | |
436 | hash = isl_space_get_tuple_hash(space); |
437 | entry = isl_hash_table_find(ctx, graph->node_table, hash, |
438 | &node_has_tuples, space, 0); |
439 | |
440 | return entry ? entry->data : graph->node + graph->n; |
441 | } |
442 | |
443 | /* Is "node" a node in "graph"? |
444 | */ |
445 | static int is_node(struct isl_sched_graph *graph, |
446 | struct isl_sched_node *node) |
447 | { |
448 | return node && node >= &graph->node[0] && node < &graph->node[graph->n]; |
449 | } |
450 | |
451 | static int edge_has_src_and_dst(const void *entry, const void *val) |
452 | { |
453 | const struct isl_sched_edge *edge = entry; |
454 | const struct isl_sched_edge *temp = val; |
455 | |
456 | return edge->src == temp->src && edge->dst == temp->dst; |
457 | } |
458 | |
459 | /* Add the given edge to graph->edge_table[type]. |
460 | */ |
461 | static isl_stat graph_edge_table_add(isl_ctx *ctx, |
462 | struct isl_sched_graph *graph, enum isl_edge_type type, |
463 | struct isl_sched_edge *edge) |
464 | { |
465 | struct isl_hash_table_entry *entry; |
466 | uint32_t hash; |
467 | |
468 | hash = isl_hash_init()(2166136261u); |
469 | hash = isl_hash_builtin(hash, edge->src)isl_hash_mem(hash, &edge->src, sizeof(edge->src)); |
470 | hash = isl_hash_builtin(hash, edge->dst)isl_hash_mem(hash, &edge->dst, sizeof(edge->dst)); |
471 | entry = isl_hash_table_find(ctx, graph->edge_table[type], hash, |
472 | &edge_has_src_and_dst, edge, 1); |
473 | if (!entry) |
474 | return isl_stat_error; |
475 | entry->data = edge; |
476 | |
477 | return isl_stat_ok; |
478 | } |
479 | |
480 | /* Add "edge" to all relevant edge tables. |
481 | * That is, for every type of the edge, add it to the corresponding table. |
482 | */ |
483 | static isl_stat graph_edge_tables_add(isl_ctx *ctx, |
484 | struct isl_sched_graph *graph, struct isl_sched_edge *edge) |
485 | { |
486 | enum isl_edge_type t; |
487 | |
488 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
489 | if (!is_type(edge, t)) |
490 | continue; |
491 | if (graph_edge_table_add(ctx, graph, t, edge) < 0) |
492 | return isl_stat_error; |
493 | } |
494 | |
495 | return isl_stat_ok; |
496 | } |
497 | |
498 | /* Allocate the edge_tables based on the maximal number of edges of |
499 | * each type. |
500 | */ |
501 | static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph) |
502 | { |
503 | int i; |
504 | |
505 | for (i = 0; i <= isl_edge_last; ++i) { |
506 | graph->edge_table[i] = isl_hash_table_alloc(ctx, |
507 | graph->max_edge[i]); |
508 | if (!graph->edge_table[i]) |
509 | return -1; |
510 | } |
511 | |
512 | return 0; |
513 | } |
514 | |
515 | /* If graph->edge_table[type] contains an edge from the given source |
516 | * to the given destination, then return the hash table entry of this edge. |
517 | * Otherwise, return NULL. |
518 | */ |
519 | static struct isl_hash_table_entry *graph_find_edge_entry( |
520 | struct isl_sched_graph *graph, |
521 | enum isl_edge_type type, |
522 | struct isl_sched_node *src, struct isl_sched_node *dst) |
523 | { |
524 | isl_ctx *ctx = isl_space_get_ctx(src->space); |
525 | uint32_t hash; |
526 | struct isl_sched_edge temp = { .src = src, .dst = dst }; |
527 | |
528 | hash = isl_hash_init()(2166136261u); |
529 | hash = isl_hash_builtin(hash, temp.src)isl_hash_mem(hash, &temp.src, sizeof(temp.src)); |
530 | hash = isl_hash_builtin(hash, temp.dst)isl_hash_mem(hash, &temp.dst, sizeof(temp.dst)); |
531 | return isl_hash_table_find(ctx, graph->edge_table[type], hash, |
532 | &edge_has_src_and_dst, &temp, 0); |
533 | } |
534 | |
535 | |
536 | /* If graph->edge_table[type] contains an edge from the given source |
537 | * to the given destination, then return this edge. |
538 | * Otherwise, return NULL. |
539 | */ |
540 | static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph, |
541 | enum isl_edge_type type, |
542 | struct isl_sched_node *src, struct isl_sched_node *dst) |
543 | { |
544 | struct isl_hash_table_entry *entry; |
545 | |
546 | entry = graph_find_edge_entry(graph, type, src, dst); |
547 | if (!entry) |
548 | return NULL((void*)0); |
549 | |
550 | return entry->data; |
551 | } |
552 | |
553 | /* Check whether the dependence graph has an edge of the given type |
554 | * between the given two nodes. |
555 | */ |
556 | static isl_bool graph_has_edge(struct isl_sched_graph *graph, |
557 | enum isl_edge_type type, |
558 | struct isl_sched_node *src, struct isl_sched_node *dst) |
559 | { |
560 | struct isl_sched_edge *edge; |
561 | isl_bool empty; |
562 | |
563 | edge = graph_find_edge(graph, type, src, dst); |
564 | if (!edge) |
565 | return isl_bool_false; |
566 | |
567 | empty = isl_map_plain_is_empty(edge->map); |
568 | if (empty < 0) |
569 | return isl_bool_error; |
570 | |
571 | return !empty; |
572 | } |
573 | |
574 | /* Look for any edge with the same src, dst and map fields as "model". |
575 | * |
576 | * Return the matching edge if one can be found. |
577 | * Return "model" if no matching edge is found. |
578 | * Return NULL on error. |
579 | */ |
580 | static struct isl_sched_edge *graph_find_matching_edge( |
581 | struct isl_sched_graph *graph, struct isl_sched_edge *model) |
582 | { |
583 | enum isl_edge_type i; |
584 | struct isl_sched_edge *edge; |
585 | |
586 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
587 | int is_equal; |
588 | |
589 | edge = graph_find_edge(graph, i, model->src, model->dst); |
590 | if (!edge) |
591 | continue; |
592 | is_equal = isl_map_plain_is_equal(model->map, edge->map); |
593 | if (is_equal < 0) |
594 | return NULL((void*)0); |
595 | if (is_equal) |
596 | return edge; |
597 | } |
598 | |
599 | return model; |
600 | } |
601 | |
602 | /* Remove the given edge from all the edge_tables that refer to it. |
603 | */ |
604 | static void graph_remove_edge(struct isl_sched_graph *graph, |
605 | struct isl_sched_edge *edge) |
606 | { |
607 | isl_ctx *ctx = isl_map_get_ctx(edge->map); |
608 | enum isl_edge_type i; |
609 | |
610 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
611 | struct isl_hash_table_entry *entry; |
612 | |
613 | entry = graph_find_edge_entry(graph, i, edge->src, edge->dst); |
614 | if (!entry) |
615 | continue; |
616 | if (entry->data != edge) |
617 | continue; |
618 | isl_hash_table_remove(ctx, graph->edge_table[i], entry); |
619 | } |
620 | } |
621 | |
622 | /* Check whether the dependence graph has any edge |
623 | * between the given two nodes. |
624 | */ |
625 | static isl_bool graph_has_any_edge(struct isl_sched_graph *graph, |
626 | struct isl_sched_node *src, struct isl_sched_node *dst) |
627 | { |
628 | enum isl_edge_type i; |
629 | isl_bool r; |
630 | |
631 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
632 | r = graph_has_edge(graph, i, src, dst); |
633 | if (r < 0 || r) |
634 | return r; |
635 | } |
636 | |
637 | return r; |
638 | } |
639 | |
640 | /* Check whether the dependence graph has a validity edge |
641 | * between the given two nodes. |
642 | * |
643 | * Conditional validity edges are essentially validity edges that |
644 | * can be ignored if the corresponding condition edges are iteration private. |
645 | * Here, we are only checking for the presence of validity |
646 | * edges, so we need to consider the conditional validity edges too. |
647 | * In particular, this function is used during the detection |
648 | * of strongly connected components and we cannot ignore |
649 | * conditional validity edges during this detection. |
650 | */ |
651 | static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph, |
652 | struct isl_sched_node *src, struct isl_sched_node *dst) |
653 | { |
654 | isl_bool r; |
655 | |
656 | r = graph_has_edge(graph, isl_edge_validity, src, dst); |
657 | if (r < 0 || r) |
658 | return r; |
659 | |
660 | return graph_has_edge(graph, isl_edge_conditional_validity, src, dst); |
661 | } |
662 | |
663 | /* Perform all the required memory allocations for a schedule graph "graph" |
664 | * with "n_node" nodes and "n_edge" edge and initialize the corresponding |
665 | * fields. |
666 | */ |
667 | static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph, |
668 | int n_node, int n_edge) |
669 | { |
670 | int i; |
671 | |
672 | graph->n = n_node; |
673 | graph->n_edge = n_edge; |
674 | graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n)((struct isl_sched_node *)isl_calloc_or_die(ctx, graph->n, sizeof(struct isl_sched_node))); |
675 | graph->sorted = isl_calloc_array(ctx, int, graph->n)((int *)isl_calloc_or_die(ctx, graph->n, sizeof(int))); |
676 | graph->region = isl_alloc_array(ctx,((struct isl_trivial_region *)isl_malloc_or_die(ctx, (graph-> n)*sizeof(struct isl_trivial_region))) |
677 | struct isl_trivial_region, graph->n)((struct isl_trivial_region *)isl_malloc_or_die(ctx, (graph-> n)*sizeof(struct isl_trivial_region))); |
678 | graph->edge = isl_calloc_array(ctx,((struct isl_sched_edge *)isl_calloc_or_die(ctx, graph->n_edge , sizeof(struct isl_sched_edge))) |
679 | struct isl_sched_edge, graph->n_edge)((struct isl_sched_edge *)isl_calloc_or_die(ctx, graph->n_edge , sizeof(struct isl_sched_edge))); |
680 | |
681 | graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); |
682 | graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); |
683 | graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge); |
684 | |
685 | if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) || |
686 | !graph->sorted) |
687 | return isl_stat_error; |
688 | |
689 | for(i = 0; i < graph->n; ++i) |
690 | graph->sorted[i] = i; |
691 | |
692 | return isl_stat_ok; |
693 | } |
694 | |
695 | /* Free the memory associated to node "node" in "graph". |
696 | * The "coincident" field is shared by nodes in a graph and its subgraph. |
697 | * It therefore only needs to be freed for the original dependence graph, |
698 | * i.e., one that is not the result of splitting. |
699 | */ |
700 | static void clear_node(struct isl_sched_graph *graph, |
701 | struct isl_sched_node *node) |
702 | { |
703 | isl_space_free(node->space); |
704 | isl_set_free(node->hull); |
705 | isl_multi_aff_free(node->compress); |
706 | isl_multi_aff_free(node->decompress); |
707 | isl_mat_free(node->sched); |
708 | isl_map_free(node->sched_map); |
709 | isl_mat_free(node->indep); |
710 | isl_mat_free(node->vmap); |
711 | if (graph->root == graph) |
712 | free(node->coincident); |
713 | isl_multi_val_free(node->sizes); |
714 | isl_basic_set_free(node->bounds); |
715 | isl_vec_free(node->max); |
716 | } |
717 | |
718 | static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) |
719 | { |
720 | int i; |
721 | |
722 | isl_map_to_basic_set_free(graph->intra_hmap); |
723 | isl_map_to_basic_set_free(graph->intra_hmap_param); |
724 | isl_map_to_basic_set_free(graph->inter_hmap); |
725 | |
726 | if (graph->node) |
727 | for (i = 0; i < graph->n; ++i) |
728 | clear_node(graph, &graph->node[i]); |
729 | free(graph->node); |
730 | free(graph->sorted); |
731 | if (graph->edge) |
732 | for (i = 0; i < graph->n_edge; ++i) { |
733 | isl_map_free(graph->edge[i].map); |
734 | isl_union_map_free(graph->edge[i].tagged_condition); |
735 | isl_union_map_free(graph->edge[i].tagged_validity); |
736 | } |
737 | free(graph->edge); |
738 | free(graph->region); |
739 | for (i = 0; i <= isl_edge_last; ++i) |
740 | isl_hash_table_free(ctx, graph->edge_table[i]); |
741 | isl_hash_table_free(ctx, graph->node_table); |
742 | isl_basic_set_free(graph->lp); |
743 | } |
744 | |
745 | /* For each "set" on which this function is called, increment |
746 | * graph->n by one and update graph->maxvar. |
747 | */ |
748 | static isl_stat init_n_maxvar(__isl_take isl_setisl_map *set, void *user) |
749 | { |
750 | struct isl_sched_graph *graph = user; |
751 | int nvar = isl_set_dim(set, isl_dim_set); |
752 | |
753 | graph->n++; |
754 | if (nvar > graph->maxvar) |
755 | graph->maxvar = nvar; |
756 | |
757 | isl_set_free(set); |
758 | |
759 | return isl_stat_ok; |
760 | } |
761 | |
762 | /* Compute the number of rows that should be allocated for the schedule. |
763 | * In particular, we need one row for each variable or one row |
764 | * for each basic map in the dependences. |
765 | * Note that it is practically impossible to exhaust both |
766 | * the number of dependences and the number of variables. |
767 | */ |
768 | static isl_stat compute_max_row(struct isl_sched_graph *graph, |
769 | __isl_keep isl_schedule_constraints *sc) |
770 | { |
771 | int n_edge; |
772 | isl_stat r; |
773 | isl_union_set *domain; |
774 | |
775 | graph->n = 0; |
776 | graph->maxvar = 0; |
777 | domain = isl_schedule_constraints_get_domain(sc); |
778 | r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph); |
779 | isl_union_set_free(domain); |
780 | if (r < 0) |
781 | return isl_stat_error; |
782 | n_edge = isl_schedule_constraints_n_basic_map(sc); |
783 | if (n_edge < 0) |
784 | return isl_stat_error; |
785 | graph->max_row = n_edge + graph->maxvar; |
786 | |
787 | return isl_stat_ok; |
788 | } |
789 | |
790 | /* Does "bset" have any defining equalities for its set variables? |
791 | */ |
792 | static isl_bool has_any_defining_equality(__isl_keep isl_basic_setisl_basic_map *bset) |
793 | { |
794 | int i, n; |
795 | |
796 | if (!bset) |
797 | return isl_bool_error; |
798 | |
799 | n = isl_basic_set_dim(bset, isl_dim_set); |
800 | for (i = 0; i < n; ++i) { |
801 | isl_bool has; |
802 | |
803 | has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i, |
804 | NULL((void*)0)); |
805 | if (has < 0 || has) |
806 | return has; |
807 | } |
808 | |
809 | return isl_bool_false; |
810 | } |
811 | |
812 | /* Set the entries of node->max to the value of the schedule_max_coefficient |
813 | * option, if set. |
814 | */ |
815 | static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node) |
816 | { |
817 | int max; |
818 | |
819 | max = isl_options_get_schedule_max_coefficient(ctx); |
820 | if (max == -1) |
821 | return isl_stat_ok; |
822 | |
823 | node->max = isl_vec_alloc(ctx, node->nvar); |
824 | node->max = isl_vec_set_si(node->max, max); |
825 | if (!node->max) |
826 | return isl_stat_error; |
827 | |
828 | return isl_stat_ok; |
829 | } |
830 | |
831 | /* Set the entries of node->max to the minimum of the schedule_max_coefficient |
832 | * option (if set) and half of the minimum of the sizes in the other |
833 | * dimensions. Round up when computing the half such that |
834 | * if the minimum of the sizes is one, half of the size is taken to be one |
835 | * rather than zero. |
836 | * If the global minimum is unbounded (i.e., if both |
837 | * the schedule_max_coefficient is not set and the sizes in the other |
838 | * dimensions are unbounded), then store a negative value. |
839 | * If the schedule coefficient is close to the size of the instance set |
840 | * in another dimension, then the schedule may represent a loop |
841 | * coalescing transformation (especially if the coefficient |
842 | * in that other dimension is one). Forcing the coefficient to be |
843 | * smaller than or equal to half the minimal size should avoid this |
844 | * situation. |
845 | */ |
846 | static isl_stat compute_max_coefficient(isl_ctx *ctx, |
847 | struct isl_sched_node *node) |
848 | { |
849 | int max; |
850 | int i, j; |
851 | isl_vec *v; |
852 | |
853 | max = isl_options_get_schedule_max_coefficient(ctx); |
854 | v = isl_vec_alloc(ctx, node->nvar); |
855 | if (!v) |
856 | return isl_stat_error; |
857 | |
858 | for (i = 0; i < node->nvar; ++i) { |
859 | isl_int_set_si(v->el[i], max)isl_sioimath_set_si((v->el[i]), max); |
860 | isl_int_mul_si(v->el[i], v->el[i], 2)isl_sioimath_mul_si((v->el[i]), *(v->el[i]), 2); |
861 | } |
862 | |
863 | for (i = 0; i < node->nvar; ++i) { |
864 | isl_val *size; |
865 | |
866 | size = isl_multi_val_get_val(node->sizes, i); |
867 | if (!size) |
868 | goto error; |
869 | if (!isl_val_is_int(size)) { |
870 | isl_val_free(size); |
871 | continue; |
872 | } |
873 | for (j = 0; j < node->nvar; ++j) { |
874 | if (j == i) |
875 | continue; |
876 | if (isl_int_is_neg(v->el[j])(isl_sioimath_sgn(*(v->el[j])) < 0) || |
877 | isl_int_gt(v->el[j], size->n)(isl_sioimath_cmp(*(v->el[j]), *(size->n)) > 0)) |
878 | isl_int_set(v->el[j], size->n)isl_sioimath_set((v->el[j]), *(size->n)); |
879 | } |
880 | isl_val_free(size); |
881 | } |
882 | |
883 | for (i = 0; i < node->nvar; ++i) |
884 | isl_int_cdiv_q_ui(v->el[i], v->el[i], 2)isl_sioimath_cdiv_q_ui((v->el[i]), *(v->el[i]), 2); |
885 | |
886 | node->max = v; |
887 | return isl_stat_ok; |
888 | error: |
889 | isl_vec_free(v); |
890 | return isl_stat_error; |
891 | } |
892 | |
893 | /* Compute and return the size of "set" in dimension "dim". |
894 | * The size is taken to be the difference in values for that variable |
895 | * for fixed values of the other variables. |
896 | * This assumes that "set" is convex. |
897 | * In particular, the variable is first isolated from the other variables |
898 | * in the range of a map |
899 | * |
900 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim] |
901 | * |
902 | * and then duplicated |
903 | * |
904 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']] |
905 | * |
906 | * The shared variables are then projected out and the maximal value |
907 | * of i_dim' - i_dim is computed. |
908 | */ |
909 | static __isl_give isl_val *compute_size(__isl_take isl_setisl_map *set, int dim) |
910 | { |
911 | isl_map *map; |
912 | isl_local_space *ls; |
913 | isl_aff *obj; |
914 | isl_val *v; |
915 | |
916 | map = isl_set_project_onto_map(set, isl_dim_set, dim, 1); |
917 | map = isl_map_project_out(map, isl_dim_in, dim, 1); |
918 | map = isl_map_range_product(map, isl_map_copy(map)); |
919 | map = isl_set_unwrap(isl_map_range(map)); |
920 | set = isl_map_deltas(map); |
921 | ls = isl_local_space_from_space(isl_set_get_space(set)); |
922 | obj = isl_aff_var_on_domain(ls, isl_dim_set, 0); |
923 | v = isl_set_max_val(set, obj); |
924 | isl_aff_free(obj); |
925 | isl_set_free(set); |
926 | |
927 | return v; |
928 | } |
929 | |
930 | /* Compute the size of the instance set "set" of "node", after compression, |
931 | * as well as bounds on the corresponding coefficients, if needed. |
932 | * |
933 | * The sizes are needed when the schedule_treat_coalescing option is set. |
934 | * The bounds are needed when the schedule_treat_coalescing option or |
935 | * the schedule_max_coefficient option is set. |
936 | * |
937 | * If the schedule_treat_coalescing option is not set, then at most |
938 | * the bounds need to be set and this is done in set_max_coefficient. |
939 | * Otherwise, compress the domain if needed, compute the size |
940 | * in each direction and store the results in node->size. |
941 | * If the domain is not convex, then the sizes are computed |
942 | * on a convex superset in order to avoid picking up sizes |
943 | * that are valid for the individual disjuncts, but not for |
944 | * the domain as a whole. |
945 | * Finally, set the bounds on the coefficients based on the sizes |
946 | * and the schedule_max_coefficient option in compute_max_coefficient. |
947 | */ |
948 | static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node, |
949 | __isl_take isl_setisl_map *set) |
950 | { |
951 | int j, n; |
952 | isl_multi_val *mv; |
953 | |
954 | if (!isl_options_get_schedule_treat_coalescing(ctx)) { |
955 | isl_set_free(set); |
956 | return set_max_coefficient(ctx, node); |
957 | } |
958 | |
959 | if (node->compressed) |
960 | set = isl_set_preimage_multi_aff(set, |
961 | isl_multi_aff_copy(node->decompress)); |
962 | set = isl_set_from_basic_set(isl_set_simple_hull(set)); |
963 | mv = isl_multi_val_zero(isl_set_get_space(set)); |
964 | n = isl_set_dim(set, isl_dim_set); |
965 | for (j = 0; j < n; ++j) { |
966 | isl_val *v; |
967 | |
968 | v = compute_size(isl_set_copy(set), j); |
969 | mv = isl_multi_val_set_val(mv, j, v); |
970 | } |
971 | node->sizes = mv; |
972 | isl_set_free(set); |
973 | if (!node->sizes) |
974 | return isl_stat_error; |
975 | return compute_max_coefficient(ctx, node); |
976 | } |
977 | |
978 | /* Add a new node to the graph representing the given instance set. |
979 | * "nvar" is the (possibly compressed) number of variables and |
980 | * may be smaller than then number of set variables in "set" |
981 | * if "compressed" is set. |
982 | * If "compressed" is set, then "hull" represents the constraints |
983 | * that were used to derive the compression, while "compress" and |
984 | * "decompress" map the original space to the compressed space and |
985 | * vice versa. |
986 | * If "compressed" is not set, then "hull", "compress" and "decompress" |
987 | * should be NULL. |
988 | * |
989 | * Compute the size of the instance set and bounds on the coefficients, |
990 | * if needed. |
991 | */ |
992 | static isl_stat add_node(struct isl_sched_graph *graph, |
993 | __isl_take isl_setisl_map *set, int nvar, int compressed, |
994 | __isl_take isl_setisl_map *hull, __isl_take isl_multi_aff *compress, |
995 | __isl_take isl_multi_aff *decompress) |
996 | { |
997 | int nparam; |
998 | isl_ctx *ctx; |
999 | isl_mat *sched; |
1000 | isl_space *space; |
1001 | int *coincident; |
1002 | struct isl_sched_node *node; |
1003 | |
1004 | if (!set) |
1005 | return isl_stat_error; |
1006 | |
1007 | ctx = isl_set_get_ctx(set); |
1008 | nparam = isl_set_dim(set, isl_dim_param); |
1009 | if (!ctx->opt->schedule_parametric) |
1010 | nparam = 0; |
1011 | sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar); |
1012 | node = &graph->node[graph->n]; |
1013 | graph->n++; |
1014 | space = isl_set_get_space(set); |
1015 | node->space = space; |
1016 | node->nvar = nvar; |
1017 | node->nparam = nparam; |
1018 | node->sched = sched; |
1019 | node->sched_map = NULL((void*)0); |
1020 | coincident = isl_calloc_array(ctx, int, graph->max_row)((int *)isl_calloc_or_die(ctx, graph->max_row, sizeof(int) )); |
1021 | node->coincident = coincident; |
1022 | node->compressed = compressed; |
1023 | node->hull = hull; |
1024 | node->compress = compress; |
1025 | node->decompress = decompress; |
1026 | if (compute_sizes_and_max(ctx, node, set) < 0) |
1027 | return isl_stat_error; |
1028 | |
1029 | if (!space || !sched || (graph->max_row && !coincident)) |
1030 | return isl_stat_error; |
1031 | if (compressed && (!hull || !compress || !decompress)) |
1032 | return isl_stat_error; |
1033 | |
1034 | return isl_stat_ok; |
1035 | } |
1036 | |
1037 | /* Construct an identifier for node "node", which will represent "set". |
1038 | * The name of the identifier is either "compressed" or |
1039 | * "compressed_<name>", with <name> the name of the space of "set". |
1040 | * The user pointer of the identifier points to "node". |
1041 | */ |
1042 | static __isl_give isl_id *construct_compressed_id(__isl_keep isl_setisl_map *set, |
1043 | struct isl_sched_node *node) |
1044 | { |
1045 | isl_bool has_name; |
1046 | isl_ctx *ctx; |
1047 | isl_id *id; |
1048 | isl_printer *p; |
1049 | const char *name; |
1050 | char *id_name; |
1051 | |
1052 | has_name = isl_set_has_tuple_name(set); |
1053 | if (has_name < 0) |
1054 | return NULL((void*)0); |
1055 | |
1056 | ctx = isl_set_get_ctx(set); |
1057 | if (!has_name) |
1058 | return isl_id_alloc(ctx, "compressed", node); |
1059 | |
1060 | p = isl_printer_to_str(ctx); |
1061 | name = isl_set_get_tuple_name(set); |
1062 | p = isl_printer_print_str(p, "compressed_"); |
1063 | p = isl_printer_print_str(p, name); |
1064 | id_name = isl_printer_get_str(p); |
1065 | isl_printer_free(p); |
1066 | |
1067 | id = isl_id_alloc(ctx, id_name, node); |
1068 | free(id_name); |
1069 | |
1070 | return id; |
1071 | } |
1072 | |
1073 | /* Add a new node to the graph representing the given set. |
1074 | * |
1075 | * If any of the set variables is defined by an equality, then |
1076 | * we perform variable compression such that we can perform |
1077 | * the scheduling on the compressed domain. |
1078 | * In this case, an identifier is used that references the new node |
1079 | * such that each compressed space is unique and |
1080 | * such that the node can be recovered from the compressed space. |
1081 | */ |
1082 | static isl_stat extract_node(__isl_take isl_setisl_map *set, void *user) |
1083 | { |
1084 | int nvar; |
1085 | isl_bool has_equality; |
1086 | isl_id *id; |
1087 | isl_basic_setisl_basic_map *hull; |
1088 | isl_setisl_map *hull_set; |
1089 | isl_morph *morph; |
1090 | isl_multi_aff *compress, *decompress; |
1091 | struct isl_sched_graph *graph = user; |
1092 | |
1093 | hull = isl_set_affine_hull(isl_set_copy(set)); |
1094 | hull = isl_basic_set_remove_divs(hull); |
1095 | nvar = isl_set_dim(set, isl_dim_set); |
1096 | has_equality = has_any_defining_equality(hull); |
1097 | |
1098 | if (has_equality < 0) |
1099 | goto error; |
1100 | if (!has_equality) { |
1101 | isl_basic_set_free(hull); |
1102 | return add_node(graph, set, nvar, 0, NULL((void*)0), NULL((void*)0), NULL((void*)0)); |
1103 | } |
1104 | |
1105 | id = construct_compressed_id(set, &graph->node[graph->n]); |
1106 | morph = isl_basic_set_variable_compression_with_id(hull, |
1107 | isl_dim_set, id); |
1108 | isl_id_free(id); |
1109 | nvar = isl_morph_ran_dim(morph, isl_dim_set); |
1110 | compress = isl_morph_get_var_multi_aff(morph); |
1111 | morph = isl_morph_inverse(morph); |
1112 | decompress = isl_morph_get_var_multi_aff(morph); |
1113 | isl_morph_free(morph); |
1114 | |
1115 | hull_set = isl_set_from_basic_set(hull); |
1116 | return add_node(graph, set, nvar, 1, hull_set, compress, decompress); |
1117 | error: |
1118 | isl_basic_set_free(hull); |
1119 | isl_set_free(set); |
1120 | return isl_stat_error; |
1121 | } |
1122 | |
1123 | struct isl_extract_edge_data { |
1124 | enum isl_edge_type type; |
1125 | struct isl_sched_graph *graph; |
1126 | }; |
1127 | |
1128 | /* Merge edge2 into edge1, freeing the contents of edge2. |
1129 | * Return 0 on success and -1 on failure. |
1130 | * |
1131 | * edge1 and edge2 are assumed to have the same value for the map field. |
1132 | */ |
1133 | static int merge_edge(struct isl_sched_edge *edge1, |
1134 | struct isl_sched_edge *edge2) |
1135 | { |
1136 | edge1->types |= edge2->types; |
1137 | isl_map_free(edge2->map); |
1138 | |
1139 | if (is_condition(edge2)) { |
1140 | if (!edge1->tagged_condition) |
1141 | edge1->tagged_condition = edge2->tagged_condition; |
1142 | else |
1143 | edge1->tagged_condition = |
1144 | isl_union_map_union(edge1->tagged_condition, |
1145 | edge2->tagged_condition); |
1146 | } |
1147 | |
1148 | if (is_conditional_validity(edge2)) { |
1149 | if (!edge1->tagged_validity) |
1150 | edge1->tagged_validity = edge2->tagged_validity; |
1151 | else |
1152 | edge1->tagged_validity = |
1153 | isl_union_map_union(edge1->tagged_validity, |
1154 | edge2->tagged_validity); |
1155 | } |
1156 | |
1157 | if (is_condition(edge2) && !edge1->tagged_condition) |
1158 | return -1; |
1159 | if (is_conditional_validity(edge2) && !edge1->tagged_validity) |
1160 | return -1; |
1161 | |
1162 | return 0; |
1163 | } |
1164 | |
1165 | /* Insert dummy tags in domain and range of "map". |
1166 | * |
1167 | * In particular, if "map" is of the form |
1168 | * |
1169 | * A -> B |
1170 | * |
1171 | * then return |
1172 | * |
1173 | * [A -> dummy_tag] -> [B -> dummy_tag] |
1174 | * |
1175 | * where the dummy_tags are identical and equal to any dummy tags |
1176 | * introduced by any other call to this function. |
1177 | */ |
1178 | static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map) |
1179 | { |
1180 | static char dummy; |
1181 | isl_ctx *ctx; |
1182 | isl_id *id; |
1183 | isl_space *space; |
1184 | isl_setisl_map *domain, *range; |
1185 | |
1186 | ctx = isl_map_get_ctx(map); |
1187 | |
1188 | id = isl_id_alloc(ctx, NULL((void*)0), &dummy); |
1189 | space = isl_space_params(isl_map_get_space(map)); |
1190 | space = isl_space_set_from_params(space); |
1191 | space = isl_space_set_tuple_id(space, isl_dim_set, id); |
1192 | space = isl_space_map_from_set(space); |
1193 | |
1194 | domain = isl_map_wrap(map); |
1195 | range = isl_map_wrap(isl_map_universe(space)); |
1196 | map = isl_map_from_domain_and_range(domain, range); |
1197 | map = isl_map_zip(map); |
1198 | |
1199 | return map; |
1200 | } |
1201 | |
1202 | /* Given that at least one of "src" or "dst" is compressed, return |
1203 | * a map between the spaces of these nodes restricted to the affine |
1204 | * hull that was used in the compression. |
1205 | */ |
1206 | static __isl_give isl_map *extract_hull(struct isl_sched_node *src, |
1207 | struct isl_sched_node *dst) |
1208 | { |
1209 | isl_setisl_map *dom, *ran; |
1210 | |
1211 | if (src->compressed) |
1212 | dom = isl_set_copy(src->hull); |
1213 | else |
1214 | dom = isl_set_universe(isl_space_copy(src->space)); |
1215 | if (dst->compressed) |
1216 | ran = isl_set_copy(dst->hull); |
1217 | else |
1218 | ran = isl_set_universe(isl_space_copy(dst->space)); |
1219 | |
1220 | return isl_map_from_domain_and_range(dom, ran); |
1221 | } |
1222 | |
1223 | /* Intersect the domains of the nested relations in domain and range |
1224 | * of "tagged" with "map". |
1225 | */ |
1226 | static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged, |
1227 | __isl_keep isl_map *map) |
1228 | { |
1229 | isl_setisl_map *set; |
1230 | |
1231 | tagged = isl_map_zip(tagged); |
1232 | set = isl_map_wrap(isl_map_copy(map)); |
1233 | tagged = isl_map_intersect_domain(tagged, set); |
1234 | tagged = isl_map_zip(tagged); |
1235 | return tagged; |
1236 | } |
1237 | |
1238 | /* Return a pointer to the node that lives in the domain space of "map", |
1239 | * an invalid node if there is no such node, or NULL in case of error. |
1240 | */ |
1241 | static struct isl_sched_node *find_domain_node(isl_ctx *ctx, |
1242 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
1243 | { |
1244 | struct isl_sched_node *node; |
1245 | isl_space *space; |
1246 | |
1247 | space = isl_space_domain(isl_map_get_space(map)); |
1248 | node = graph_find_node(ctx, graph, space); |
1249 | isl_space_free(space); |
1250 | |
1251 | return node; |
1252 | } |
1253 | |
1254 | /* Return a pointer to the node that lives in the range space of "map", |
1255 | * an invalid node if there is no such node, or NULL in case of error. |
1256 | */ |
1257 | static struct isl_sched_node *find_range_node(isl_ctx *ctx, |
1258 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
1259 | { |
1260 | struct isl_sched_node *node; |
1261 | isl_space *space; |
1262 | |
1263 | space = isl_space_range(isl_map_get_space(map)); |
1264 | node = graph_find_node(ctx, graph, space); |
1265 | isl_space_free(space); |
1266 | |
1267 | return node; |
1268 | } |
1269 | |
1270 | /* Refrain from adding a new edge based on "map". |
1271 | * Instead, just free the map. |
1272 | * "tagged" is either a copy of "map" with additional tags or NULL. |
1273 | */ |
1274 | static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged) |
1275 | { |
1276 | isl_map_free(map); |
1277 | isl_map_free(tagged); |
1278 | |
1279 | return isl_stat_ok; |
1280 | } |
1281 | |
1282 | /* Add a new edge to the graph based on the given map |
1283 | * and add it to data->graph->edge_table[data->type]. |
1284 | * If a dependence relation of a given type happens to be identical |
1285 | * to one of the dependence relations of a type that was added before, |
1286 | * then we don't create a new edge, but instead mark the original edge |
1287 | * as also representing a dependence of the current type. |
1288 | * |
1289 | * Edges of type isl_edge_condition or isl_edge_conditional_validity |
1290 | * may be specified as "tagged" dependence relations. That is, "map" |
1291 | * may contain elements (i -> a) -> (j -> b), where i -> j denotes |
1292 | * the dependence on iterations and a and b are tags. |
1293 | * edge->map is set to the relation containing the elements i -> j, |
1294 | * while edge->tagged_condition and edge->tagged_validity contain |
1295 | * the union of all the "map" relations |
1296 | * for which extract_edge is called that result in the same edge->map. |
1297 | * |
1298 | * If the source or the destination node is compressed, then |
1299 | * intersect both "map" and "tagged" with the constraints that |
1300 | * were used to construct the compression. |
1301 | * This ensures that there are no schedule constraints defined |
1302 | * outside of these domains, while the scheduler no longer has |
1303 | * any control over those outside parts. |
1304 | */ |
1305 | static isl_stat extract_edge(__isl_take isl_map *map, void *user) |
1306 | { |
1307 | isl_bool empty; |
1308 | isl_ctx *ctx = isl_map_get_ctx(map); |
1309 | struct isl_extract_edge_data *data = user; |
1310 | struct isl_sched_graph *graph = data->graph; |
1311 | struct isl_sched_node *src, *dst; |
1312 | struct isl_sched_edge *edge; |
1313 | isl_map *tagged = NULL((void*)0); |
1314 | |
1315 | if (data->type == isl_edge_condition || |
1316 | data->type == isl_edge_conditional_validity) { |
1317 | if (isl_map_can_zip(map)) { |
1318 | tagged = isl_map_copy(map); |
1319 | map = isl_set_unwrap(isl_map_domain(isl_map_zip(map))); |
1320 | } else { |
1321 | tagged = insert_dummy_tags(isl_map_copy(map)); |
1322 | } |
1323 | } |
1324 | |
1325 | src = find_domain_node(ctx, graph, map); |
1326 | dst = find_range_node(ctx, graph, map); |
1327 | |
1328 | if (!src || !dst) |
1329 | goto error; |
1330 | if (!is_node(graph, src) || !is_node(graph, dst)) |
1331 | return skip_edge(map, tagged); |
1332 | |
1333 | if (src->compressed || dst->compressed) { |
1334 | isl_map *hull; |
1335 | hull = extract_hull(src, dst); |
1336 | if (tagged) |
1337 | tagged = map_intersect_domains(tagged, hull); |
1338 | map = isl_map_intersect(map, hull); |
1339 | } |
1340 | |
1341 | empty = isl_map_plain_is_empty(map); |
1342 | if (empty < 0) |
1343 | goto error; |
1344 | if (empty) |
1345 | return skip_edge(map, tagged); |
1346 | |
1347 | graph->edge[graph->n_edge].src = src; |
1348 | graph->edge[graph->n_edge].dst = dst; |
1349 | graph->edge[graph->n_edge].map = map; |
1350 | graph->edge[graph->n_edge].types = 0; |
1351 | graph->edge[graph->n_edge].tagged_condition = NULL((void*)0); |
1352 | graph->edge[graph->n_edge].tagged_validity = NULL((void*)0); |
1353 | set_type(&graph->edge[graph->n_edge], data->type); |
1354 | if (data->type == isl_edge_condition) |
1355 | graph->edge[graph->n_edge].tagged_condition = |
1356 | isl_union_map_from_map(tagged); |
1357 | if (data->type == isl_edge_conditional_validity) |
1358 | graph->edge[graph->n_edge].tagged_validity = |
1359 | isl_union_map_from_map(tagged); |
1360 | |
1361 | edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]); |
1362 | if (!edge) { |
1363 | graph->n_edge++; |
1364 | return isl_stat_error; |
1365 | } |
1366 | if (edge == &graph->edge[graph->n_edge]) |
1367 | return graph_edge_table_add(ctx, graph, data->type, |
1368 | &graph->edge[graph->n_edge++]); |
1369 | |
1370 | if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0) |
1371 | return isl_stat_error; |
1372 | |
1373 | return graph_edge_table_add(ctx, graph, data->type, edge); |
1374 | error: |
1375 | isl_map_free(map); |
1376 | isl_map_free(tagged); |
1377 | return isl_stat_error; |
1378 | } |
1379 | |
1380 | /* Initialize the schedule graph "graph" from the schedule constraints "sc". |
1381 | * |
1382 | * The context is included in the domain before the nodes of |
1383 | * the graphs are extracted in order to be able to exploit |
1384 | * any possible additional equalities. |
1385 | * Note that this intersection is only performed locally here. |
1386 | */ |
1387 | static isl_stat graph_init(struct isl_sched_graph *graph, |
1388 | __isl_keep isl_schedule_constraints *sc) |
1389 | { |
1390 | isl_ctx *ctx; |
1391 | isl_union_set *domain; |
1392 | isl_union_map *c; |
1393 | struct isl_extract_edge_data data; |
1394 | enum isl_edge_type i; |
1395 | isl_stat r; |
1396 | |
1397 | if (!sc) |
1398 | return isl_stat_error; |
1399 | |
1400 | ctx = isl_schedule_constraints_get_ctx(sc); |
1401 | |
1402 | domain = isl_schedule_constraints_get_domain(sc); |
1403 | graph->n = isl_union_set_n_set(domain); |
1404 | isl_union_set_free(domain); |
1405 | |
1406 | if (graph_alloc(ctx, graph, graph->n, |
1407 | isl_schedule_constraints_n_map(sc)) < 0) |
1408 | return isl_stat_error; |
1409 | |
1410 | if (compute_max_row(graph, sc) < 0) |
1411 | return isl_stat_error; |
1412 | graph->root = graph; |
1413 | graph->n = 0; |
1414 | domain = isl_schedule_constraints_get_domain(sc); |
1415 | domain = isl_union_set_intersect_params(domain, |
1416 | isl_schedule_constraints_get_context(sc)); |
1417 | r = isl_union_set_foreach_set(domain, &extract_node, graph); |
1418 | isl_union_set_free(domain); |
1419 | if (r < 0) |
1420 | return isl_stat_error; |
1421 | if (graph_init_table(ctx, graph) < 0) |
1422 | return isl_stat_error; |
1423 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
1424 | c = isl_schedule_constraints_get(sc, i); |
1425 | graph->max_edge[i] = isl_union_map_n_map(c); |
1426 | isl_union_map_free(c); |
1427 | if (!c) |
1428 | return isl_stat_error; |
1429 | } |
1430 | if (graph_init_edge_tables(ctx, graph) < 0) |
1431 | return isl_stat_error; |
1432 | graph->n_edge = 0; |
1433 | data.graph = graph; |
1434 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
1435 | isl_stat r; |
1436 | |
1437 | data.type = i; |
1438 | c = isl_schedule_constraints_get(sc, i); |
1439 | r = isl_union_map_foreach_map(c, &extract_edge, &data); |
1440 | isl_union_map_free(c); |
1441 | if (r < 0) |
1442 | return isl_stat_error; |
1443 | } |
1444 | |
1445 | return isl_stat_ok; |
1446 | } |
1447 | |
1448 | /* Check whether there is any dependence from node[j] to node[i] |
1449 | * or from node[i] to node[j]. |
1450 | */ |
1451 | static isl_bool node_follows_weak(int i, int j, void *user) |
1452 | { |
1453 | isl_bool f; |
1454 | struct isl_sched_graph *graph = user; |
1455 | |
1456 | f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]); |
1457 | if (f < 0 || f) |
1458 | return f; |
1459 | return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]); |
1460 | } |
1461 | |
1462 | /* Check whether there is a (conditional) validity dependence from node[j] |
1463 | * to node[i], forcing node[i] to follow node[j]. |
1464 | */ |
1465 | static isl_bool node_follows_strong(int i, int j, void *user) |
1466 | { |
1467 | struct isl_sched_graph *graph = user; |
1468 | |
1469 | return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); |
1470 | } |
1471 | |
1472 | /* Use Tarjan's algorithm for computing the strongly connected components |
1473 | * in the dependence graph only considering those edges defined by "follows". |
1474 | */ |
1475 | static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph, |
1476 | isl_bool (*follows)(int i, int j, void *user)) |
1477 | { |
1478 | int i, n; |
1479 | struct isl_tarjan_graph *g = NULL((void*)0); |
1480 | |
1481 | g = isl_tarjan_graph_init(ctx, graph->n, follows, graph); |
1482 | if (!g) |
1483 | return isl_stat_error; |
1484 | |
1485 | graph->scc = 0; |
1486 | i = 0; |
1487 | n = graph->n; |
1488 | while (n) { |
1489 | while (g->order[i] != -1) { |
1490 | graph->node[g->order[i]].scc = graph->scc; |
1491 | --n; |
1492 | ++i; |
1493 | } |
1494 | ++i; |
1495 | graph->scc++; |
1496 | } |
1497 | |
1498 | isl_tarjan_graph_free(g); |
1499 | |
1500 | return isl_stat_ok; |
1501 | } |
1502 | |
1503 | /* Apply Tarjan's algorithm to detect the strongly connected components |
1504 | * in the dependence graph. |
1505 | * Only consider the (conditional) validity dependences and clear "weak". |
1506 | */ |
1507 | static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
1508 | { |
1509 | graph->weak = 0; |
1510 | return detect_ccs(ctx, graph, &node_follows_strong); |
1511 | } |
1512 | |
1513 | /* Apply Tarjan's algorithm to detect the (weakly) connected components |
1514 | * in the dependence graph. |
1515 | * Consider all dependences and set "weak". |
1516 | */ |
1517 | static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
1518 | { |
1519 | graph->weak = 1; |
1520 | return detect_ccs(ctx, graph, &node_follows_weak); |
1521 | } |
1522 | |
1523 | static int cmp_scc(const void *a, const void *b, void *data) |
1524 | { |
1525 | struct isl_sched_graph *graph = data; |
1526 | const int *i1 = a; |
1527 | const int *i2 = b; |
1528 | |
1529 | return graph->node[*i1].scc - graph->node[*i2].scc; |
1530 | } |
1531 | |
1532 | /* Sort the elements of graph->sorted according to the corresponding SCCs. |
1533 | */ |
1534 | static int sort_sccs(struct isl_sched_graph *graph) |
1535 | { |
1536 | return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph); |
1537 | } |
1538 | |
1539 | /* Return a non-parametric set in the compressed space of "node" that is |
1540 | * bounded by the size in each direction |
1541 | * |
1542 | * { [x] : -S_i <= x_i <= S_i } |
1543 | * |
1544 | * If S_i is infinity in direction i, then there are no constraints |
1545 | * in that direction. |
1546 | * |
1547 | * Cache the result in node->bounds. |
1548 | */ |
1549 | static __isl_give isl_basic_setisl_basic_map *get_size_bounds(struct isl_sched_node *node) |
1550 | { |
1551 | isl_space *space; |
1552 | isl_basic_setisl_basic_map *bounds; |
1553 | int i; |
1554 | unsigned nparam; |
1555 | |
1556 | if (node->bounds) |
1557 | return isl_basic_set_copy(node->bounds); |
1558 | |
1559 | if (node->compressed) |
1560 | space = isl_multi_aff_get_domain_space(node->decompress); |
1561 | else |
1562 | space = isl_space_copy(node->space); |
1563 | nparam = isl_space_dim(space, isl_dim_param); |
1564 | space = isl_space_drop_dims(space, isl_dim_param, 0, nparam); |
1565 | bounds = isl_basic_set_universe(space); |
1566 | |
1567 | for (i = 0; i < node->nvar; ++i) { |
1568 | isl_val *size; |
1569 | |
1570 | size = isl_multi_val_get_val(node->sizes, i); |
1571 | if (!size) |
1572 | return isl_basic_set_free(bounds); |
1573 | if (!isl_val_is_int(size)) { |
1574 | isl_val_free(size); |
1575 | continue; |
1576 | } |
1577 | bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i, |
1578 | isl_val_copy(size)); |
1579 | bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i, |
1580 | isl_val_neg(size)); |
1581 | } |
1582 | |
1583 | node->bounds = isl_basic_set_copy(bounds); |
1584 | return bounds; |
1585 | } |
1586 | |
1587 | /* Drop some constraints from "delta" that could be exploited |
1588 | * to construct loop coalescing schedules. |
1589 | * In particular, drop those constraint that bound the difference |
1590 | * to the size of the domain. |
1591 | * First project out the parameters to improve the effectiveness. |
1592 | */ |
1593 | static __isl_give isl_setisl_map *drop_coalescing_constraints( |
1594 | __isl_take isl_setisl_map *delta, struct isl_sched_node *node) |
1595 | { |
1596 | unsigned nparam; |
1597 | isl_basic_setisl_basic_map *bounds; |
1598 | |
1599 | bounds = get_size_bounds(node); |
1600 | |
1601 | nparam = isl_set_dim(delta, isl_dim_param); |
1602 | delta = isl_set_project_out(delta, isl_dim_param, 0, nparam); |
1603 | delta = isl_set_remove_divs(delta); |
1604 | delta = isl_set_plain_gist_basic_set(delta, bounds); |
1605 | return delta; |
1606 | } |
1607 | |
1608 | /* Given a dependence relation R from "node" to itself, |
1609 | * construct the set of coefficients of valid constraints for elements |
1610 | * in that dependence relation. |
1611 | * In particular, the result contains tuples of coefficients |
1612 | * c_0, c_n, c_x such that |
1613 | * |
1614 | * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R |
1615 | * |
1616 | * or, equivalently, |
1617 | * |
1618 | * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
1619 | * |
1620 | * We choose here to compute the dual of delta R. |
1621 | * Alternatively, we could have computed the dual of R, resulting |
1622 | * in a set of tuples c_0, c_n, c_x, c_y, and then |
1623 | * plugged in (c_0, c_n, c_x, -c_x). |
1624 | * |
1625 | * If "need_param" is set, then the resulting coefficients effectively |
1626 | * include coefficients for the parameters c_n. Otherwise, they may |
1627 | * have been projected out already. |
1628 | * Since the constraints may be different for these two cases, |
1629 | * they are stored in separate caches. |
1630 | * In particular, if no parameter coefficients are required and |
1631 | * the schedule_treat_coalescing option is set, then the parameters |
1632 | * are projected out and some constraints that could be exploited |
1633 | * to construct coalescing schedules are removed before the dual |
1634 | * is computed. |
1635 | * |
1636 | * If "node" has been compressed, then the dependence relation |
1637 | * is also compressed before the set of coefficients is computed. |
1638 | */ |
1639 | static __isl_give isl_basic_setisl_basic_map *intra_coefficients( |
1640 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
1641 | __isl_take isl_map *map, int need_param) |
1642 | { |
1643 | isl_ctx *ctx; |
1644 | isl_setisl_map *delta; |
1645 | isl_map *key; |
1646 | isl_basic_setisl_basic_map *coef; |
1647 | isl_maybe_isl_basic_setisl_maybe_isl_basic_map m; |
1648 | isl_map_to_basic_set **hmap = &graph->intra_hmap; |
1649 | int treat; |
1650 | |
1651 | if (!map) |
1652 | return NULL((void*)0); |
1653 | |
1654 | ctx = isl_map_get_ctx(map); |
1655 | treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx); |
1656 | if (!treat) |
1657 | hmap = &graph->intra_hmap_param; |
1658 | m = isl_map_to_basic_set_try_get(*hmap, map); |
1659 | if (m.valid < 0 || m.valid) { |
1660 | isl_map_free(map); |
1661 | return m.value; |
1662 | } |
1663 | |
1664 | key = isl_map_copy(map); |
1665 | if (node->compressed) { |
1666 | map = isl_map_preimage_domain_multi_aff(map, |
1667 | isl_multi_aff_copy(node->decompress)); |
1668 | map = isl_map_preimage_range_multi_aff(map, |
1669 | isl_multi_aff_copy(node->decompress)); |
1670 | } |
1671 | delta = isl_map_deltas(map); |
1672 | if (treat) |
1673 | delta = drop_coalescing_constraints(delta, node); |
1674 | delta = isl_set_remove_divs(delta); |
1675 | coef = isl_set_coefficients(delta); |
1676 | *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef)); |
1677 | |
1678 | return coef; |
1679 | } |
1680 | |
1681 | /* Given a dependence relation R, construct the set of coefficients |
1682 | * of valid constraints for elements in that dependence relation. |
1683 | * In particular, the result contains tuples of coefficients |
1684 | * c_0, c_n, c_x, c_y such that |
1685 | * |
1686 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
1687 | * |
1688 | * If the source or destination nodes of "edge" have been compressed, |
1689 | * then the dependence relation is also compressed before |
1690 | * the set of coefficients is computed. |
1691 | */ |
1692 | static __isl_give isl_basic_setisl_basic_map *inter_coefficients( |
1693 | struct isl_sched_graph *graph, struct isl_sched_edge *edge, |
1694 | __isl_take isl_map *map) |
1695 | { |
1696 | isl_setisl_map *set; |
1697 | isl_map *key; |
1698 | isl_basic_setisl_basic_map *coef; |
1699 | isl_maybe_isl_basic_setisl_maybe_isl_basic_map m; |
1700 | |
1701 | m = isl_map_to_basic_set_try_get(graph->inter_hmap, map); |
1702 | if (m.valid < 0 || m.valid) { |
1703 | isl_map_free(map); |
1704 | return m.value; |
1705 | } |
1706 | |
1707 | key = isl_map_copy(map); |
1708 | if (edge->src->compressed) |
1709 | map = isl_map_preimage_domain_multi_aff(map, |
1710 | isl_multi_aff_copy(edge->src->decompress)); |
1711 | if (edge->dst->compressed) |
1712 | map = isl_map_preimage_range_multi_aff(map, |
1713 | isl_multi_aff_copy(edge->dst->decompress)); |
1714 | set = isl_map_wrap(isl_map_remove_divs(map)); |
1715 | coef = isl_set_coefficients(set); |
1716 | graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key, |
1717 | isl_basic_set_copy(coef)); |
1718 | |
1719 | return coef; |
1720 | } |
1721 | |
1722 | /* Return the position of the coefficients of the variables in |
1723 | * the coefficients constraints "coef". |
1724 | * |
1725 | * The space of "coef" is of the form |
1726 | * |
1727 | * { coefficients[[cst, params] -> S] } |
1728 | * |
1729 | * Return the position of S. |
1730 | */ |
1731 | static int coef_var_offset(__isl_keep isl_basic_setisl_basic_map *coef) |
1732 | { |
1733 | int offset; |
1734 | isl_space *space; |
1735 | |
1736 | space = isl_space_unwrap(isl_basic_set_get_space(coef)); |
1737 | offset = isl_space_dim(space, isl_dim_in); |
1738 | isl_space_free(space); |
1739 | |
1740 | return offset; |
1741 | } |
1742 | |
1743 | /* Return the offset of the coefficient of the constant term of "node" |
1744 | * within the (I)LP. |
1745 | * |
1746 | * Within each node, the coefficients have the following order: |
1747 | * - positive and negative parts of c_i_x |
1748 | * - c_i_n (if parametric) |
1749 | * - c_i_0 |
1750 | */ |
1751 | static int node_cst_coef_offset(struct isl_sched_node *node) |
1752 | { |
1753 | return node->start + 2 * node->nvar + node->nparam; |
1754 | } |
1755 | |
1756 | /* Return the offset of the coefficients of the parameters of "node" |
1757 | * within the (I)LP. |
1758 | * |
1759 | * Within each node, the coefficients have the following order: |
1760 | * - positive and negative parts of c_i_x |
1761 | * - c_i_n (if parametric) |
1762 | * - c_i_0 |
1763 | */ |
1764 | static int node_par_coef_offset(struct isl_sched_node *node) |
1765 | { |
1766 | return node->start + 2 * node->nvar; |
1767 | } |
1768 | |
1769 | /* Return the offset of the coefficients of the variables of "node" |
1770 | * within the (I)LP. |
1771 | * |
1772 | * Within each node, the coefficients have the following order: |
1773 | * - positive and negative parts of c_i_x |
1774 | * - c_i_n (if parametric) |
1775 | * - c_i_0 |
1776 | */ |
1777 | static int node_var_coef_offset(struct isl_sched_node *node) |
1778 | { |
1779 | return node->start; |
1780 | } |
1781 | |
1782 | /* Return the position of the pair of variables encoding |
1783 | * coefficient "i" of "node". |
1784 | * |
1785 | * The order of these variable pairs is the opposite of |
1786 | * that of the coefficients, with 2 variables per coefficient. |
1787 | */ |
1788 | static int node_var_coef_pos(struct isl_sched_node *node, int i) |
1789 | { |
1790 | return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i); |
1791 | } |
1792 | |
1793 | /* Construct an isl_dim_map for mapping constraints on coefficients |
1794 | * for "node" to the corresponding positions in graph->lp. |
1795 | * "offset" is the offset of the coefficients for the variables |
1796 | * in the input constraints. |
1797 | * "s" is the sign of the mapping. |
1798 | * |
1799 | * The input constraints are given in terms of the coefficients |
1800 | * (c_0, c_x) or (c_0, c_n, c_x). |
1801 | * The mapping produced by this function essentially plugs in |
1802 | * (0, c_i_x^+ - c_i_x^-) if s = 1 and |
1803 | * (0, -c_i_x^+ + c_i_x^-) if s = -1 or |
1804 | * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and |
1805 | * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1. |
1806 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
1807 | * Furthermore, the order of these pairs is the opposite of that |
1808 | * of the corresponding coefficients. |
1809 | * |
1810 | * The caller can extend the mapping to also map the other coefficients |
1811 | * (and therefore not plug in 0). |
1812 | */ |
1813 | static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx, |
1814 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
1815 | int offset, int s) |
1816 | { |
1817 | int pos; |
1818 | unsigned total; |
1819 | isl_dim_map *dim_map; |
1820 | |
1821 | if (!node || !graph->lp) |
1822 | return NULL((void*)0); |
1823 | |
1824 | total = isl_basic_set_total_dim(graph->lp); |
1825 | pos = node_var_coef_pos(node, 0); |
1826 | dim_map = isl_dim_map_alloc(ctx, total); |
1827 | isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s); |
1828 | isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s); |
1829 | |
1830 | return dim_map; |
1831 | } |
1832 | |
1833 | /* Construct an isl_dim_map for mapping constraints on coefficients |
1834 | * for "src" (node i) and "dst" (node j) to the corresponding positions |
1835 | * in graph->lp. |
1836 | * "offset" is the offset of the coefficients for the variables of "src" |
1837 | * in the input constraints. |
1838 | * "s" is the sign of the mapping. |
1839 | * |
1840 | * The input constraints are given in terms of the coefficients |
1841 | * (c_0, c_n, c_x, c_y). |
1842 | * The mapping produced by this function essentially plugs in |
1843 | * (c_j_0 - c_i_0, c_j_n - c_i_n, |
1844 | * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and |
1845 | * (-c_j_0 + c_i_0, -c_j_n + c_i_n, |
1846 | * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1. |
1847 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
1848 | * Furthermore, the order of these pairs is the opposite of that |
1849 | * of the corresponding coefficients. |
1850 | * |
1851 | * The caller can further extend the mapping. |
1852 | */ |
1853 | static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx, |
1854 | struct isl_sched_graph *graph, struct isl_sched_node *src, |
1855 | struct isl_sched_node *dst, int offset, int s) |
1856 | { |
1857 | int pos; |
1858 | unsigned total; |
1859 | isl_dim_map *dim_map; |
1860 | |
1861 | if (!src || !dst || !graph->lp) |
1862 | return NULL((void*)0); |
1863 | |
1864 | total = isl_basic_set_total_dim(graph->lp); |
1865 | dim_map = isl_dim_map_alloc(ctx, total); |
1866 | |
1867 | pos = node_cst_coef_offset(dst); |
1868 | isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s); |
1869 | pos = node_par_coef_offset(dst); |
1870 | isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s); |
1871 | pos = node_var_coef_pos(dst, 0); |
1872 | isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1, |
1873 | dst->nvar, -s); |
1874 | isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1, |
1875 | dst->nvar, s); |
1876 | |
1877 | pos = node_cst_coef_offset(src); |
1878 | isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s); |
1879 | pos = node_par_coef_offset(src); |
1880 | isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s); |
1881 | pos = node_var_coef_pos(src, 0); |
1882 | isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s); |
1883 | isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s); |
1884 | |
1885 | return dim_map; |
1886 | } |
1887 | |
1888 | /* Add the constraints from "src" to "dst" using "dim_map", |
1889 | * after making sure there is enough room in "dst" for the extra constraints. |
1890 | */ |
1891 | static __isl_give isl_basic_setisl_basic_map *add_constraints_dim_map( |
1892 | __isl_take isl_basic_setisl_basic_map *dst, __isl_take isl_basic_setisl_basic_map *src, |
1893 | __isl_take isl_dim_map *dim_map) |
1894 | { |
1895 | int n_eq, n_ineq; |
1896 | |
1897 | n_eq = isl_basic_set_n_equality(src); |
1898 | n_ineq = isl_basic_set_n_inequality(src); |
1899 | dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq); |
1900 | dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map); |
1901 | return dst; |
1902 | } |
1903 | |
1904 | /* Add constraints to graph->lp that force validity for the given |
1905 | * dependence from a node i to itself. |
1906 | * That is, add constraints that enforce |
1907 | * |
1908 | * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x) |
1909 | * = c_i_x (y - x) >= 0 |
1910 | * |
1911 | * for each (x,y) in R. |
1912 | * We obtain general constraints on coefficients (c_0, c_x) |
1913 | * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-), |
1914 | * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. |
1915 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
1916 | * Note that the result of intra_coefficients may also contain |
1917 | * parameter coefficients c_n, in which case 0 is plugged in for them as well. |
1918 | */ |
1919 | static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, |
1920 | struct isl_sched_edge *edge) |
1921 | { |
1922 | int offset; |
1923 | isl_map *map = isl_map_copy(edge->map); |
1924 | isl_ctx *ctx = isl_map_get_ctx(map); |
1925 | isl_dim_map *dim_map; |
1926 | isl_basic_setisl_basic_map *coef; |
1927 | struct isl_sched_node *node = edge->src; |
1928 | |
1929 | coef = intra_coefficients(graph, node, map, 0); |
1930 | |
1931 | offset = coef_var_offset(coef); |
1932 | |
1933 | if (!coef) |
1934 | return isl_stat_error; |
1935 | |
1936 | dim_map = intra_dim_map(ctx, graph, node, offset, 1); |
1937 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
1938 | |
1939 | return isl_stat_ok; |
1940 | } |
1941 | |
1942 | /* Add constraints to graph->lp that force validity for the given |
1943 | * dependence from node i to node j. |
1944 | * That is, add constraints that enforce |
1945 | * |
1946 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0 |
1947 | * |
1948 | * for each (x,y) in R. |
1949 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
1950 | * of valid constraints for R and then plug in |
1951 | * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-), |
1952 | * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. |
1953 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
1954 | */ |
1955 | static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, |
1956 | struct isl_sched_edge *edge) |
1957 | { |
1958 | int offset; |
1959 | isl_map *map; |
1960 | isl_ctx *ctx; |
1961 | isl_dim_map *dim_map; |
1962 | isl_basic_setisl_basic_map *coef; |
1963 | struct isl_sched_node *src = edge->src; |
1964 | struct isl_sched_node *dst = edge->dst; |
1965 | |
1966 | if (!graph->lp) |
1967 | return isl_stat_error; |
1968 | |
1969 | map = isl_map_copy(edge->map); |
1970 | ctx = isl_map_get_ctx(map); |
1971 | coef = inter_coefficients(graph, edge, map); |
1972 | |
1973 | offset = coef_var_offset(coef); |
1974 | |
1975 | if (!coef) |
1976 | return isl_stat_error; |
1977 | |
1978 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1); |
1979 | |
1980 | edge->start = graph->lp->n_ineq; |
1981 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
1982 | if (!graph->lp) |
1983 | return isl_stat_error; |
1984 | edge->end = graph->lp->n_ineq; |
1985 | |
1986 | return isl_stat_ok; |
1987 | } |
1988 | |
1989 | /* Add constraints to graph->lp that bound the dependence distance for the given |
1990 | * dependence from a node i to itself. |
1991 | * If s = 1, we add the constraint |
1992 | * |
1993 | * c_i_x (y - x) <= m_0 + m_n n |
1994 | * |
1995 | * or |
1996 | * |
1997 | * -c_i_x (y - x) + m_0 + m_n n >= 0 |
1998 | * |
1999 | * for each (x,y) in R. |
2000 | * If s = -1, we add the constraint |
2001 | * |
2002 | * -c_i_x (y - x) <= m_0 + m_n n |
2003 | * |
2004 | * or |
2005 | * |
2006 | * c_i_x (y - x) + m_0 + m_n n >= 0 |
2007 | * |
2008 | * for each (x,y) in R. |
2009 | * We obtain general constraints on coefficients (c_0, c_n, c_x) |
2010 | * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x), |
2011 | * with each coefficient (except m_0) represented as a pair of non-negative |
2012 | * coefficients. |
2013 | * |
2014 | * |
2015 | * If "local" is set, then we add constraints |
2016 | * |
2017 | * c_i_x (y - x) <= 0 |
2018 | * |
2019 | * or |
2020 | * |
2021 | * -c_i_x (y - x) <= 0 |
2022 | * |
2023 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
2024 | * That is, we plug in (0, 0, -s * c_i_x), |
2025 | * intra_coefficients is not required to have c_n in its result when |
2026 | * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in. |
2027 | * Note that dependences marked local are treated as validity constraints |
2028 | * by add_all_validity_constraints and therefore also have |
2029 | * their distances bounded by 0 from below. |
2030 | */ |
2031 | static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, |
2032 | struct isl_sched_edge *edge, int s, int local) |
2033 | { |
2034 | int offset; |
2035 | unsigned nparam; |
2036 | isl_map *map = isl_map_copy(edge->map); |
2037 | isl_ctx *ctx = isl_map_get_ctx(map); |
2038 | isl_dim_map *dim_map; |
2039 | isl_basic_setisl_basic_map *coef; |
2040 | struct isl_sched_node *node = edge->src; |
2041 | |
2042 | coef = intra_coefficients(graph, node, map, !local); |
2043 | |
2044 | offset = coef_var_offset(coef); |
2045 | |
2046 | if (!coef) |
2047 | return isl_stat_error; |
2048 | |
2049 | nparam = isl_space_dim(node->space, isl_dim_param); |
2050 | dim_map = intra_dim_map(ctx, graph, node, offset, -s); |
2051 | |
2052 | if (!local) { |
2053 | isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); |
2054 | isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); |
2055 | isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); |
2056 | } |
2057 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
2058 | |
2059 | return isl_stat_ok; |
2060 | } |
2061 | |
2062 | /* Add constraints to graph->lp that bound the dependence distance for the given |
2063 | * dependence from node i to node j. |
2064 | * If s = 1, we add the constraint |
2065 | * |
2066 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) |
2067 | * <= m_0 + m_n n |
2068 | * |
2069 | * or |
2070 | * |
2071 | * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) + |
2072 | * m_0 + m_n n >= 0 |
2073 | * |
2074 | * for each (x,y) in R. |
2075 | * If s = -1, we add the constraint |
2076 | * |
2077 | * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) |
2078 | * <= m_0 + m_n n |
2079 | * |
2080 | * or |
2081 | * |
2082 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) + |
2083 | * m_0 + m_n n >= 0 |
2084 | * |
2085 | * for each (x,y) in R. |
2086 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
2087 | * of valid constraints for R and then plug in |
2088 | * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n, |
2089 | * s*c_i_x, -s*c_j_x) |
2090 | * with each coefficient (except m_0, c_*_0 and c_*_n) |
2091 | * represented as a pair of non-negative coefficients. |
2092 | * |
2093 | * |
2094 | * If "local" is set (and s = 1), then we add constraints |
2095 | * |
2096 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0 |
2097 | * |
2098 | * or |
2099 | * |
2100 | * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0 |
2101 | * |
2102 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
2103 | * That is, we plug in |
2104 | * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x). |
2105 | * Note that dependences marked local are treated as validity constraints |
2106 | * by add_all_validity_constraints and therefore also have |
2107 | * their distances bounded by 0 from below. |
2108 | */ |
2109 | static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph, |
2110 | struct isl_sched_edge *edge, int s, int local) |
2111 | { |
2112 | int offset; |
2113 | unsigned nparam; |
2114 | isl_map *map = isl_map_copy(edge->map); |
2115 | isl_ctx *ctx = isl_map_get_ctx(map); |
2116 | isl_dim_map *dim_map; |
2117 | isl_basic_setisl_basic_map *coef; |
2118 | struct isl_sched_node *src = edge->src; |
2119 | struct isl_sched_node *dst = edge->dst; |
2120 | |
2121 | coef = inter_coefficients(graph, edge, map); |
2122 | |
2123 | offset = coef_var_offset(coef); |
2124 | |
2125 | if (!coef) |
2126 | return isl_stat_error; |
2127 | |
2128 | nparam = isl_space_dim(src->space, isl_dim_param); |
2129 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s); |
2130 | |
2131 | if (!local) { |
2132 | isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); |
2133 | isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); |
2134 | isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); |
2135 | } |
2136 | |
2137 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
2138 | |
2139 | return isl_stat_ok; |
2140 | } |
2141 | |
2142 | /* Should the distance over "edge" be forced to zero? |
2143 | * That is, is it marked as a local edge? |
2144 | * If "use_coincidence" is set, then coincidence edges are treated |
2145 | * as local edges. |
2146 | */ |
2147 | static int force_zero(struct isl_sched_edge *edge, int use_coincidence) |
2148 | { |
2149 | return is_local(edge) || (use_coincidence && is_coincidence(edge)); |
2150 | } |
2151 | |
2152 | /* Add all validity constraints to graph->lp. |
2153 | * |
2154 | * An edge that is forced to be local needs to have its dependence |
2155 | * distances equal to zero. We take care of bounding them by 0 from below |
2156 | * here. add_all_proximity_constraints takes care of bounding them by 0 |
2157 | * from above. |
2158 | * |
2159 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2160 | * Otherwise, we ignore them. |
2161 | */ |
2162 | static int add_all_validity_constraints(struct isl_sched_graph *graph, |
2163 | int use_coincidence) |
2164 | { |
2165 | int i; |
2166 | |
2167 | for (i = 0; i < graph->n_edge; ++i) { |
2168 | struct isl_sched_edge *edge = &graph->edge[i]; |
2169 | int zero; |
2170 | |
2171 | zero = force_zero(edge, use_coincidence); |
2172 | if (!is_validity(edge) && !zero) |
2173 | continue; |
2174 | if (edge->src != edge->dst) |
2175 | continue; |
2176 | if (add_intra_validity_constraints(graph, edge) < 0) |
2177 | return -1; |
2178 | } |
2179 | |
2180 | for (i = 0; i < graph->n_edge; ++i) { |
2181 | struct isl_sched_edge *edge = &graph->edge[i]; |
2182 | int zero; |
2183 | |
2184 | zero = force_zero(edge, use_coincidence); |
2185 | if (!is_validity(edge) && !zero) |
2186 | continue; |
2187 | if (edge->src == edge->dst) |
2188 | continue; |
2189 | if (add_inter_validity_constraints(graph, edge) < 0) |
2190 | return -1; |
2191 | } |
2192 | |
2193 | return 0; |
2194 | } |
2195 | |
2196 | /* Add constraints to graph->lp that bound the dependence distance |
2197 | * for all dependence relations. |
2198 | * If a given proximity dependence is identical to a validity |
2199 | * dependence, then the dependence distance is already bounded |
2200 | * from below (by zero), so we only need to bound the distance |
2201 | * from above. (This includes the case of "local" dependences |
2202 | * which are treated as validity dependence by add_all_validity_constraints.) |
2203 | * Otherwise, we need to bound the distance both from above and from below. |
2204 | * |
2205 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2206 | * Otherwise, we ignore them. |
2207 | */ |
2208 | static int add_all_proximity_constraints(struct isl_sched_graph *graph, |
2209 | int use_coincidence) |
2210 | { |
2211 | int i; |
2212 | |
2213 | for (i = 0; i < graph->n_edge; ++i) { |
2214 | struct isl_sched_edge *edge = &graph->edge[i]; |
2215 | int zero; |
2216 | |
2217 | zero = force_zero(edge, use_coincidence); |
2218 | if (!is_proximity(edge) && !zero) |
2219 | continue; |
2220 | if (edge->src == edge->dst && |
2221 | add_intra_proximity_constraints(graph, edge, 1, zero) < 0) |
2222 | return -1; |
2223 | if (edge->src != edge->dst && |
2224 | add_inter_proximity_constraints(graph, edge, 1, zero) < 0) |
2225 | return -1; |
2226 | if (is_validity(edge) || zero) |
2227 | continue; |
2228 | if (edge->src == edge->dst && |
2229 | add_intra_proximity_constraints(graph, edge, -1, 0) < 0) |
2230 | return -1; |
2231 | if (edge->src != edge->dst && |
2232 | add_inter_proximity_constraints(graph, edge, -1, 0) < 0) |
2233 | return -1; |
2234 | } |
2235 | |
2236 | return 0; |
2237 | } |
2238 | |
2239 | /* Normalize the rows of "indep" such that all rows are lexicographically |
2240 | * positive and such that each row contains as many final zeros as possible, |
2241 | * given the choice for the previous rows. |
2242 | * Do this by performing elementary row operations. |
2243 | */ |
2244 | static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep) |
2245 | { |
2246 | indep = isl_mat_reverse_gauss(indep); |
2247 | indep = isl_mat_lexnonneg_rows(indep); |
2248 | return indep; |
2249 | } |
2250 | |
2251 | /* Compute a basis for the rows in the linear part of the schedule |
2252 | * and extend this basis to a full basis. The remaining rows |
2253 | * can then be used to force linear independence from the rows |
2254 | * in the schedule. |
2255 | * |
2256 | * In particular, given the schedule rows S, we compute |
2257 | * |
2258 | * S = H Q |
2259 | * S U = H |
2260 | * |
2261 | * with H the Hermite normal form of S. That is, all but the |
2262 | * first rank columns of H are zero and so each row in S is |
2263 | * a linear combination of the first rank rows of Q. |
2264 | * The matrix Q can be used as a variable transformation |
2265 | * that isolates the directions of S in the first rank rows. |
2266 | * Transposing S U = H yields |
2267 | * |
2268 | * U^T S^T = H^T |
2269 | * |
2270 | * with all but the first rank rows of H^T zero. |
2271 | * The last rows of U^T are therefore linear combinations |
2272 | * of schedule coefficients that are all zero on schedule |
2273 | * coefficients that are linearly dependent on the rows of S. |
2274 | * At least one of these combinations is non-zero on |
2275 | * linearly independent schedule coefficients. |
2276 | * The rows are normalized to involve as few of the last |
2277 | * coefficients as possible and to have a positive initial value. |
2278 | */ |
2279 | static int node_update_vmap(struct isl_sched_node *node) |
2280 | { |
2281 | isl_mat *H, *U, *Q; |
2282 | int n_row = isl_mat_rows(node->sched); |
2283 | |
2284 | H = isl_mat_sub_alloc(node->sched, 0, n_row, |
2285 | 1 + node->nparam, node->nvar); |
2286 | |
2287 | H = isl_mat_left_hermite(H, 0, &U, &Q); |
2288 | isl_mat_free(node->indep); |
2289 | isl_mat_free(node->vmap); |
2290 | node->vmap = Q; |
2291 | node->indep = isl_mat_transpose(U); |
2292 | node->rank = isl_mat_initial_non_zero_cols(H); |
2293 | node->indep = isl_mat_drop_rows(node->indep, 0, node->rank); |
2294 | node->indep = normalize_independent(node->indep); |
2295 | isl_mat_free(H); |
2296 | |
2297 | if (!node->indep || !node->vmap || node->rank < 0) |
2298 | return -1; |
2299 | return 0; |
2300 | } |
2301 | |
2302 | /* Is "edge" marked as a validity or a conditional validity edge? |
2303 | */ |
2304 | static int is_any_validity(struct isl_sched_edge *edge) |
2305 | { |
2306 | return is_validity(edge) || is_conditional_validity(edge); |
2307 | } |
2308 | |
2309 | /* How many times should we count the constraints in "edge"? |
2310 | * |
2311 | * We count as follows |
2312 | * validity -> 1 (>= 0) |
2313 | * validity+proximity -> 2 (>= 0 and upper bound) |
2314 | * proximity -> 2 (lower and upper bound) |
2315 | * local(+any) -> 2 (>= 0 and <= 0) |
2316 | * |
2317 | * If an edge is only marked conditional_validity then it counts |
2318 | * as zero since it is only checked afterwards. |
2319 | * |
2320 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2321 | * Otherwise, we ignore them. |
2322 | */ |
2323 | static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence) |
2324 | { |
2325 | if (is_proximity(edge) || force_zero(edge, use_coincidence)) |
2326 | return 2; |
2327 | if (is_validity(edge)) |
2328 | return 1; |
2329 | return 0; |
2330 | } |
2331 | |
2332 | /* How many times should the constraints in "edge" be counted |
2333 | * as a parametric intra-node constraint? |
2334 | * |
2335 | * Only proximity edges that are not forced zero need |
2336 | * coefficient constraints that include coefficients for parameters. |
2337 | * If the edge is also a validity edge, then only |
2338 | * an upper bound is introduced. Otherwise, both lower and upper bounds |
2339 | * are introduced. |
2340 | */ |
2341 | static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge, |
2342 | int use_coincidence) |
2343 | { |
2344 | if (edge->src != edge->dst) |
2345 | return 0; |
2346 | if (!is_proximity(edge)) |
2347 | return 0; |
2348 | if (force_zero(edge, use_coincidence)) |
2349 | return 0; |
2350 | if (is_validity(edge)) |
2351 | return 1; |
2352 | else |
2353 | return 2; |
2354 | } |
2355 | |
2356 | /* Add "f" times the number of equality and inequality constraints of "bset" |
2357 | * to "n_eq" and "n_ineq" and free "bset". |
2358 | */ |
2359 | static isl_stat update_count(__isl_take isl_basic_setisl_basic_map *bset, |
2360 | int f, int *n_eq, int *n_ineq) |
2361 | { |
2362 | if (!bset) |
2363 | return isl_stat_error; |
2364 | |
2365 | *n_eq += isl_basic_set_n_equality(bset); |
2366 | *n_ineq += isl_basic_set_n_inequality(bset); |
2367 | isl_basic_set_free(bset); |
2368 | |
2369 | return isl_stat_ok; |
2370 | } |
2371 | |
2372 | /* Count the number of equality and inequality constraints |
2373 | * that will be added for the given map. |
2374 | * |
2375 | * The edges that require parameter coefficients are counted separately. |
2376 | * |
2377 | * "use_coincidence" is set if we should take into account coincidence edges. |
2378 | */ |
2379 | static isl_stat count_map_constraints(struct isl_sched_graph *graph, |
2380 | struct isl_sched_edge *edge, __isl_take isl_map *map, |
2381 | int *n_eq, int *n_ineq, int use_coincidence) |
2382 | { |
2383 | isl_map *copy; |
2384 | isl_basic_setisl_basic_map *coef; |
2385 | int f = edge_multiplicity(edge, use_coincidence); |
2386 | int fp = parametric_intra_edge_multiplicity(edge, use_coincidence); |
2387 | |
2388 | if (f == 0) { |
2389 | isl_map_free(map); |
2390 | return isl_stat_ok; |
2391 | } |
2392 | |
2393 | if (edge->src != edge->dst) { |
2394 | coef = inter_coefficients(graph, edge, map); |
2395 | return update_count(coef, f, n_eq, n_ineq); |
2396 | } |
2397 | |
2398 | if (fp > 0) { |
2399 | copy = isl_map_copy(map); |
2400 | coef = intra_coefficients(graph, edge->src, copy, 1); |
2401 | if (update_count(coef, fp, n_eq, n_ineq) < 0) |
2402 | goto error; |
2403 | } |
2404 | |
2405 | if (f > fp) { |
2406 | copy = isl_map_copy(map); |
2407 | coef = intra_coefficients(graph, edge->src, copy, 0); |
2408 | if (update_count(coef, f - fp, n_eq, n_ineq) < 0) |
2409 | goto error; |
2410 | } |
2411 | |
2412 | isl_map_free(map); |
2413 | return isl_stat_ok; |
2414 | error: |
2415 | isl_map_free(map); |
2416 | return isl_stat_error; |
2417 | } |
2418 | |
2419 | /* Count the number of equality and inequality constraints |
2420 | * that will be added to the main lp problem. |
2421 | * We count as follows |
2422 | * validity -> 1 (>= 0) |
2423 | * validity+proximity -> 2 (>= 0 and upper bound) |
2424 | * proximity -> 2 (lower and upper bound) |
2425 | * local(+any) -> 2 (>= 0 and <= 0) |
2426 | * |
2427 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2428 | * Otherwise, we ignore them. |
2429 | */ |
2430 | static int count_constraints(struct isl_sched_graph *graph, |
2431 | int *n_eq, int *n_ineq, int use_coincidence) |
2432 | { |
2433 | int i; |
2434 | |
2435 | *n_eq = *n_ineq = 0; |
2436 | for (i = 0; i < graph->n_edge; ++i) { |
2437 | struct isl_sched_edge *edge = &graph->edge[i]; |
2438 | isl_map *map = isl_map_copy(edge->map); |
2439 | |
2440 | if (count_map_constraints(graph, edge, map, n_eq, n_ineq, |
2441 | use_coincidence) < 0) |
2442 | return -1; |
2443 | } |
2444 | |
2445 | return 0; |
2446 | } |
2447 | |
2448 | /* Count the number of constraints that will be added by |
2449 | * add_bound_constant_constraints to bound the values of the constant terms |
2450 | * and increment *n_eq and *n_ineq accordingly. |
2451 | * |
2452 | * In practice, add_bound_constant_constraints only adds inequalities. |
2453 | */ |
2454 | static isl_stat count_bound_constant_constraints(isl_ctx *ctx, |
2455 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
2456 | { |
2457 | if (isl_options_get_schedule_max_constant_term(ctx) == -1) |
2458 | return isl_stat_ok; |
2459 | |
2460 | *n_ineq += graph->n; |
2461 | |
2462 | return isl_stat_ok; |
2463 | } |
2464 | |
2465 | /* Add constraints to bound the values of the constant terms in the schedule, |
2466 | * if requested by the user. |
2467 | * |
2468 | * The maximal value of the constant terms is defined by the option |
2469 | * "schedule_max_constant_term". |
2470 | */ |
2471 | static isl_stat add_bound_constant_constraints(isl_ctx *ctx, |
2472 | struct isl_sched_graph *graph) |
2473 | { |
2474 | int i, k; |
2475 | int max; |
2476 | int total; |
2477 | |
2478 | max = isl_options_get_schedule_max_constant_term(ctx); |
2479 | if (max == -1) |
2480 | return isl_stat_ok; |
2481 | |
2482 | total = isl_basic_set_dim(graph->lp, isl_dim_set); |
2483 | |
2484 | for (i = 0; i < graph->n; ++i) { |
2485 | struct isl_sched_node *node = &graph->node[i]; |
2486 | int pos; |
2487 | |
2488 | k = isl_basic_set_alloc_inequality(graph->lp); |
2489 | if (k < 0) |
2490 | return isl_stat_error; |
2491 | isl_seq_clr(graph->lp->ineq[k], 1 + total); |
2492 | pos = node_cst_coef_offset(node); |
2493 | isl_int_set_si(graph->lp->ineq[k][1 + pos], -1)isl_sioimath_set_si((graph->lp->ineq[k][1 + pos]), -1); |
2494 | isl_int_set_si(graph->lp->ineq[k][0], max)isl_sioimath_set_si((graph->lp->ineq[k][0]), max); |
2495 | } |
2496 | |
2497 | return isl_stat_ok; |
2498 | } |
2499 | |
2500 | /* Count the number of constraints that will be added by |
2501 | * add_bound_coefficient_constraints and increment *n_eq and *n_ineq |
2502 | * accordingly. |
2503 | * |
2504 | * In practice, add_bound_coefficient_constraints only adds inequalities. |
2505 | */ |
2506 | static int count_bound_coefficient_constraints(isl_ctx *ctx, |
2507 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
2508 | { |
2509 | int i; |
2510 | |
2511 | if (isl_options_get_schedule_max_coefficient(ctx) == -1 && |
2512 | !isl_options_get_schedule_treat_coalescing(ctx)) |
2513 | return 0; |
2514 | |
2515 | for (i = 0; i < graph->n; ++i) |
2516 | *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar; |
2517 | |
2518 | return 0; |
2519 | } |
2520 | |
2521 | /* Add constraints to graph->lp that bound the values of |
2522 | * the parameter schedule coefficients of "node" to "max" and |
2523 | * the variable schedule coefficients to the corresponding entry |
2524 | * in node->max. |
2525 | * In either case, a negative value means that no bound needs to be imposed. |
2526 | * |
2527 | * For parameter coefficients, this amounts to adding a constraint |
2528 | * |
2529 | * c_n <= max |
2530 | * |
2531 | * i.e., |
2532 | * |
2533 | * -c_n + max >= 0 |
2534 | * |
2535 | * The variables coefficients are, however, not represented directly. |
2536 | * Instead, the variable coefficients c_x are written as differences |
2537 | * c_x = c_x^+ - c_x^-. |
2538 | * That is, |
2539 | * |
2540 | * -max_i <= c_x_i <= max_i |
2541 | * |
2542 | * is encoded as |
2543 | * |
2544 | * -max_i <= c_x_i^+ - c_x_i^- <= max_i |
2545 | * |
2546 | * or |
2547 | * |
2548 | * -(c_x_i^+ - c_x_i^-) + max_i >= 0 |
2549 | * c_x_i^+ - c_x_i^- + max_i >= 0 |
2550 | */ |
2551 | static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, |
2552 | struct isl_sched_graph *graph, struct isl_sched_node *node, int max) |
2553 | { |
2554 | int i, j, k; |
2555 | int total; |
2556 | isl_vec *ineq; |
2557 | |
2558 | total = isl_basic_set_dim(graph->lp, isl_dim_set); |
2559 | |
2560 | for (j = 0; j < node->nparam; ++j) { |
2561 | int dim; |
2562 | |
2563 | if (max < 0) |
2564 | continue; |
2565 | |
2566 | k = isl_basic_set_alloc_inequality(graph->lp); |
2567 | if (k < 0) |
2568 | return isl_stat_error; |
2569 | dim = 1 + node_par_coef_offset(node) + j; |
2570 | isl_seq_clr(graph->lp->ineq[k], 1 + total); |
2571 | isl_int_set_si(graph->lp->ineq[k][dim], -1)isl_sioimath_set_si((graph->lp->ineq[k][dim]), -1); |
2572 | isl_int_set_si(graph->lp->ineq[k][0], max)isl_sioimath_set_si((graph->lp->ineq[k][0]), max); |
2573 | } |
2574 | |
2575 | ineq = isl_vec_alloc(ctx, 1 + total); |
2576 | ineq = isl_vec_clr(ineq); |
2577 | if (!ineq) |
2578 | return isl_stat_error; |
2579 | for (i = 0; i < node->nvar; ++i) { |
2580 | int pos = 1 + node_var_coef_pos(node, i); |
2581 | |
2582 | if (isl_int_is_neg(node->max->el[i])(isl_sioimath_sgn(*(node->max->el[i])) < 0)) |
2583 | continue; |
2584 | |
2585 | isl_int_set_si(ineq->el[pos], 1)isl_sioimath_set_si((ineq->el[pos]), 1); |
2586 | isl_int_set_si(ineq->el[pos + 1], -1)isl_sioimath_set_si((ineq->el[pos + 1]), -1); |
2587 | isl_int_set(ineq->el[0], node->max->el[i])isl_sioimath_set((ineq->el[0]), *(node->max->el[i])); |
2588 | |
2589 | k = isl_basic_set_alloc_inequality(graph->lp); |
2590 | if (k < 0) |
2591 | goto error; |
2592 | isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total); |
2593 | |
2594 | isl_seq_neg(ineq->el + pos, ineq->el + pos, 2); |
2595 | k = isl_basic_set_alloc_inequality(graph->lp); |
2596 | if (k < 0) |
2597 | goto error; |
2598 | isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total); |
2599 | |
2600 | isl_seq_clr(ineq->el + pos, 2); |
2601 | } |
2602 | isl_vec_free(ineq); |
2603 | |
2604 | return isl_stat_ok; |
2605 | error: |
2606 | isl_vec_free(ineq); |
2607 | return isl_stat_error; |
2608 | } |
2609 | |
2610 | /* Add constraints that bound the values of the variable and parameter |
2611 | * coefficients of the schedule. |
2612 | * |
2613 | * The maximal value of the coefficients is defined by the option |
2614 | * 'schedule_max_coefficient' and the entries in node->max. |
2615 | * These latter entries are only set if either the schedule_max_coefficient |
2616 | * option or the schedule_treat_coalescing option is set. |
2617 | */ |
2618 | static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx, |
2619 | struct isl_sched_graph *graph) |
2620 | { |
2621 | int i; |
2622 | int max; |
2623 | |
2624 | max = isl_options_get_schedule_max_coefficient(ctx); |
2625 | |
2626 | if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx)) |
2627 | return isl_stat_ok; |
2628 | |
2629 | for (i = 0; i < graph->n; ++i) { |
2630 | struct isl_sched_node *node = &graph->node[i]; |
2631 | |
2632 | if (node_add_coefficient_constraints(ctx, graph, node, max) < 0) |
2633 | return isl_stat_error; |
2634 | } |
2635 | |
2636 | return isl_stat_ok; |
2637 | } |
2638 | |
2639 | /* Add a constraint to graph->lp that equates the value at position |
2640 | * "sum_pos" to the sum of the "n" values starting at "first". |
2641 | */ |
2642 | static isl_stat add_sum_constraint(struct isl_sched_graph *graph, |
2643 | int sum_pos, int first, int n) |
2644 | { |
2645 | int i, k; |
2646 | int total; |
2647 | |
2648 | total = isl_basic_set_dim(graph->lp, isl_dim_set); |
2649 | |
2650 | k = isl_basic_set_alloc_equality(graph->lp); |
2651 | if (k < 0) |
2652 | return isl_stat_error; |
2653 | isl_seq_clr(graph->lp->eq[k], 1 + total); |
2654 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1)isl_sioimath_set_si((graph->lp->eq[k][1 + sum_pos]), -1 ); |
2655 | for (i = 0; i < n; ++i) |
2656 | isl_int_set_si(graph->lp->eq[k][1 + first + i], 1)isl_sioimath_set_si((graph->lp->eq[k][1 + first + i]), 1 ); |
2657 | |
2658 | return isl_stat_ok; |
2659 | } |
2660 | |
2661 | /* Add a constraint to graph->lp that equates the value at position |
2662 | * "sum_pos" to the sum of the parameter coefficients of all nodes. |
2663 | */ |
2664 | static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph, |
2665 | int sum_pos) |
2666 | { |
2667 | int i, j, k; |
2668 | int total; |
2669 | |
2670 | total = isl_basic_set_dim(graph->lp, isl_dim_set); |
2671 | |
2672 | k = isl_basic_set_alloc_equality(graph->lp); |
2673 | if (k < 0) |
2674 | return isl_stat_error; |
2675 | isl_seq_clr(graph->lp->eq[k], 1 + total); |
2676 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1)isl_sioimath_set_si((graph->lp->eq[k][1 + sum_pos]), -1 ); |
2677 | for (i = 0; i < graph->n; ++i) { |
2678 | int pos = 1 + node_par_coef_offset(&graph->node[i]); |
2679 | |
2680 | for (j = 0; j < graph->node[i].nparam; ++j) |
2681 | isl_int_set_si(graph->lp->eq[k][pos + j], 1)isl_sioimath_set_si((graph->lp->eq[k][pos + j]), 1); |
2682 | } |
2683 | |
2684 | return isl_stat_ok; |
2685 | } |
2686 | |
2687 | /* Add a constraint to graph->lp that equates the value at position |
2688 | * "sum_pos" to the sum of the variable coefficients of all nodes. |
2689 | */ |
2690 | static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph, |
2691 | int sum_pos) |
2692 | { |
2693 | int i, j, k; |
2694 | int total; |
2695 | |
2696 | total = isl_basic_set_dim(graph->lp, isl_dim_set); |
2697 | |
2698 | k = isl_basic_set_alloc_equality(graph->lp); |
2699 | if (k < 0) |
2700 | return isl_stat_error; |
2701 | isl_seq_clr(graph->lp->eq[k], 1 + total); |
2702 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1)isl_sioimath_set_si((graph->lp->eq[k][1 + sum_pos]), -1 ); |
2703 | for (i = 0; i < graph->n; ++i) { |
2704 | struct isl_sched_node *node = &graph->node[i]; |
2705 | int pos = 1 + node_var_coef_offset(node); |
2706 | |
2707 | for (j = 0; j < 2 * node->nvar; ++j) |
2708 | isl_int_set_si(graph->lp->eq[k][pos + j], 1)isl_sioimath_set_si((graph->lp->eq[k][pos + j]), 1); |
2709 | } |
2710 | |
2711 | return isl_stat_ok; |
2712 | } |
2713 | |
2714 | /* Construct an ILP problem for finding schedule coefficients |
2715 | * that result in non-negative, but small dependence distances |
2716 | * over all dependences. |
2717 | * In particular, the dependence distances over proximity edges |
2718 | * are bounded by m_0 + m_n n and we compute schedule coefficients |
2719 | * with small values (preferably zero) of m_n and m_0. |
2720 | * |
2721 | * All variables of the ILP are non-negative. The actual coefficients |
2722 | * may be negative, so each coefficient is represented as the difference |
2723 | * of two non-negative variables. The negative part always appears |
2724 | * immediately before the positive part. |
2725 | * Other than that, the variables have the following order |
2726 | * |
2727 | * - sum of positive and negative parts of m_n coefficients |
2728 | * - m_0 |
2729 | * - sum of all c_n coefficients |
2730 | * (unconstrained when computing non-parametric schedules) |
2731 | * - sum of positive and negative parts of all c_x coefficients |
2732 | * - positive and negative parts of m_n coefficients |
2733 | * - for each node |
2734 | * - positive and negative parts of c_i_x, in opposite order |
2735 | * - c_i_n (if parametric) |
2736 | * - c_i_0 |
2737 | * |
2738 | * The constraints are those from the edges plus two or three equalities |
2739 | * to express the sums. |
2740 | * |
2741 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2742 | * Otherwise, we ignore them. |
2743 | */ |
2744 | static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
2745 | int use_coincidence) |
2746 | { |
2747 | int i; |
2748 | unsigned nparam; |
2749 | unsigned total; |
2750 | isl_space *space; |
2751 | int parametric; |
2752 | int param_pos; |
2753 | int n_eq, n_ineq; |
2754 | |
2755 | parametric = ctx->opt->schedule_parametric; |
2756 | nparam = isl_space_dim(graph->node[0].space, isl_dim_param); |
2757 | param_pos = 4; |
2758 | total = param_pos + 2 * nparam; |
2759 | for (i = 0; i < graph->n; ++i) { |
2760 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
2761 | if (node_update_vmap(node) < 0) |
2762 | return isl_stat_error; |
2763 | node->start = total; |
2764 | total += 1 + node->nparam + 2 * node->nvar; |
2765 | } |
2766 | |
2767 | if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0) |
2768 | return isl_stat_error; |
2769 | if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0) |
2770 | return isl_stat_error; |
2771 | if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0) |
2772 | return isl_stat_error; |
2773 | |
2774 | space = isl_space_set_alloc(ctx, 0, total); |
2775 | isl_basic_set_free(graph->lp); |
2776 | n_eq += 2 + parametric; |
2777 | |
2778 | graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq); |
2779 | |
2780 | if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0) |
2781 | return isl_stat_error; |
2782 | if (parametric && add_param_sum_constraint(graph, 2) < 0) |
2783 | return isl_stat_error; |
2784 | if (add_var_sum_constraint(graph, 3) < 0) |
2785 | return isl_stat_error; |
2786 | if (add_bound_constant_constraints(ctx, graph) < 0) |
2787 | return isl_stat_error; |
2788 | if (add_bound_coefficient_constraints(ctx, graph) < 0) |
2789 | return isl_stat_error; |
2790 | if (add_all_validity_constraints(graph, use_coincidence) < 0) |
2791 | return isl_stat_error; |
2792 | if (add_all_proximity_constraints(graph, use_coincidence) < 0) |
2793 | return isl_stat_error; |
2794 | |
2795 | return isl_stat_ok; |
2796 | } |
2797 | |
2798 | /* Analyze the conflicting constraint found by |
2799 | * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity |
2800 | * constraint of one of the edges between distinct nodes, living, moreover |
2801 | * in distinct SCCs, then record the source and sink SCC as this may |
2802 | * be a good place to cut between SCCs. |
2803 | */ |
2804 | static int check_conflict(int con, void *user) |
2805 | { |
2806 | int i; |
2807 | struct isl_sched_graph *graph = user; |
2808 | |
2809 | if (graph->src_scc >= 0) |
2810 | return 0; |
2811 | |
2812 | con -= graph->lp->n_eq; |
2813 | |
2814 | if (con >= graph->lp->n_ineq) |
2815 | return 0; |
2816 | |
2817 | for (i = 0; i < graph->n_edge; ++i) { |
2818 | if (!is_validity(&graph->edge[i])) |
2819 | continue; |
2820 | if (graph->edge[i].src == graph->edge[i].dst) |
2821 | continue; |
2822 | if (graph->edge[i].src->scc == graph->edge[i].dst->scc) |
2823 | continue; |
2824 | if (graph->edge[i].start > con) |
2825 | continue; |
2826 | if (graph->edge[i].end <= con) |
2827 | continue; |
2828 | graph->src_scc = graph->edge[i].src->scc; |
2829 | graph->dst_scc = graph->edge[i].dst->scc; |
2830 | } |
2831 | |
2832 | return 0; |
2833 | } |
2834 | |
2835 | /* Check whether the next schedule row of the given node needs to be |
2836 | * non-trivial. Lower-dimensional domains may have some trivial rows, |
2837 | * but as soon as the number of remaining required non-trivial rows |
2838 | * is as large as the number or remaining rows to be computed, |
2839 | * all remaining rows need to be non-trivial. |
2840 | */ |
2841 | static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) |
2842 | { |
2843 | return node->nvar - node->rank >= graph->maxvar - graph->n_row; |
2844 | } |
2845 | |
2846 | /* Construct a non-triviality region with triviality directions |
2847 | * corresponding to the rows of "indep". |
2848 | * The rows of "indep" are expressed in terms of the schedule coefficients c_i, |
2849 | * while the triviality directions are expressed in terms of |
2850 | * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing |
2851 | * before c^+_i. Furthermore, |
2852 | * the pairs of non-negative variables representing the coefficients |
2853 | * are stored in the opposite order. |
2854 | */ |
2855 | static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep) |
2856 | { |
2857 | isl_ctx *ctx; |
2858 | isl_mat *mat; |
2859 | int i, j, n, n_var; |
2860 | |
2861 | if (!indep) |
2862 | return NULL((void*)0); |
2863 | |
2864 | ctx = isl_mat_get_ctx(indep); |
2865 | n = isl_mat_rows(indep); |
2866 | n_var = isl_mat_cols(indep); |
2867 | mat = isl_mat_alloc(ctx, n, 2 * n_var); |
2868 | if (!mat) |
2869 | return NULL((void*)0); |
2870 | for (i = 0; i < n; ++i) { |
2871 | for (j = 0; j < n_var; ++j) { |
2872 | int nj = n_var - 1 - j; |
2873 | isl_int_neg(mat->row[i][2 * nj], indep->row[i][j])isl_sioimath_neg((mat->row[i][2 * nj]), *(indep->row[i] [j])); |
2874 | isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j])isl_sioimath_set((mat->row[i][2 * nj + 1]), *(indep->row [i][j])); |
2875 | } |
2876 | } |
2877 | |
2878 | return mat; |
2879 | } |
2880 | |
2881 | /* Solve the ILP problem constructed in setup_lp. |
2882 | * For each node such that all the remaining rows of its schedule |
2883 | * need to be non-trivial, we construct a non-triviality region. |
2884 | * This region imposes that the next row is independent of previous rows. |
2885 | * In particular, the non-triviality region enforces that at least |
2886 | * one of the linear combinations in the rows of node->indep is non-zero. |
2887 | */ |
2888 | static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) |
2889 | { |
2890 | int i; |
2891 | isl_vec *sol; |
2892 | isl_basic_setisl_basic_map *lp; |
2893 | |
2894 | for (i = 0; i < graph->n; ++i) { |
2895 | struct isl_sched_node *node = &graph->node[i]; |
2896 | isl_mat *trivial; |
2897 | |
2898 | graph->region[i].pos = node_var_coef_offset(node); |
2899 | if (needs_row(graph, node)) |
2900 | trivial = construct_trivial(node->indep); |
2901 | else |
2902 | trivial = isl_mat_zero(ctx, 0, 0); |
2903 | graph->region[i].trivial = trivial; |
2904 | } |
2905 | lp = isl_basic_set_copy(graph->lp); |
2906 | sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n, |
2907 | graph->region, &check_conflict, graph); |
2908 | for (i = 0; i < graph->n; ++i) |
2909 | isl_mat_free(graph->region[i].trivial); |
2910 | return sol; |
2911 | } |
2912 | |
2913 | /* Extract the coefficients for the variables of "node" from "sol". |
2914 | * |
2915 | * Each schedule coefficient c_i_x is represented as the difference |
2916 | * between two non-negative variables c_i_x^+ - c_i_x^-. |
2917 | * The c_i_x^- appear before their c_i_x^+ counterpart. |
2918 | * Furthermore, the order of these pairs is the opposite of that |
2919 | * of the corresponding coefficients. |
2920 | * |
2921 | * Return c_i_x = c_i_x^+ - c_i_x^- |
2922 | */ |
2923 | static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node, |
2924 | __isl_keep isl_vec *sol) |
2925 | { |
2926 | int i; |
2927 | int pos; |
2928 | isl_vec *csol; |
2929 | |
2930 | if (!sol) |
2931 | return NULL((void*)0); |
2932 | csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar); |
2933 | if (!csol) |
2934 | return NULL((void*)0); |
2935 | |
2936 | pos = 1 + node_var_coef_offset(node); |
2937 | for (i = 0; i < node->nvar; ++i) |
2938 | isl_int_sub(csol->el[node->nvar - 1 - i],isl_sioimath_sub((csol->el[node->nvar - 1 - i]), *(sol-> el[pos + 2 * i + 1]), *(sol->el[pos + 2 * i])) |
2939 | sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i])isl_sioimath_sub((csol->el[node->nvar - 1 - i]), *(sol-> el[pos + 2 * i + 1]), *(sol->el[pos + 2 * i])); |
2940 | |
2941 | return csol; |
2942 | } |
2943 | |
2944 | /* Update the schedules of all nodes based on the given solution |
2945 | * of the LP problem. |
2946 | * The new row is added to the current band. |
2947 | * All possibly negative coefficients are encoded as a difference |
2948 | * of two non-negative variables, so we need to perform the subtraction |
2949 | * here. |
2950 | * |
2951 | * If coincident is set, then the caller guarantees that the new |
2952 | * row satisfies the coincidence constraints. |
2953 | */ |
2954 | static int update_schedule(struct isl_sched_graph *graph, |
2955 | __isl_take isl_vec *sol, int coincident) |
2956 | { |
2957 | int i, j; |
2958 | isl_vec *csol = NULL((void*)0); |
2959 | |
2960 | if (!sol) |
2961 | goto error; |
2962 | if (sol->size == 0) |
2963 | isl_die(sol->ctx, isl_error_internal,do { isl_handle_error(sol->ctx, isl_error_internal, "no solution found" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 2964); goto error; } while (0) |
2964 | "no solution found", goto error)do { isl_handle_error(sol->ctx, isl_error_internal, "no solution found" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 2964); goto error; } while (0); |
2965 | if (graph->n_total_row >= graph->max_row) |
2966 | isl_die(sol->ctx, isl_error_internal,do { isl_handle_error(sol->ctx, isl_error_internal, "too many schedule rows" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 2967); goto error; } while (0) |
2967 | "too many schedule rows", goto error)do { isl_handle_error(sol->ctx, isl_error_internal, "too many schedule rows" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 2967); goto error; } while (0); |
2968 | |
2969 | for (i = 0; i < graph->n; ++i) { |
2970 | struct isl_sched_node *node = &graph->node[i]; |
2971 | int pos; |
2972 | int row = isl_mat_rows(node->sched); |
2973 | |
2974 | isl_vec_free(csol); |
2975 | csol = extract_var_coef(node, sol); |
2976 | if (!csol) |
2977 | goto error; |
2978 | |
2979 | isl_map_free(node->sched_map); |
2980 | node->sched_map = NULL((void*)0); |
2981 | node->sched = isl_mat_add_rows(node->sched, 1); |
2982 | if (!node->sched) |
2983 | goto error; |
2984 | pos = node_cst_coef_offset(node); |
2985 | node->sched = isl_mat_set_element(node->sched, |
2986 | row, 0, sol->el[1 + pos]); |
2987 | pos = node_par_coef_offset(node); |
2988 | for (j = 0; j < node->nparam; ++j) |
2989 | node->sched = isl_mat_set_element(node->sched, |
2990 | row, 1 + j, sol->el[1 + pos + j]); |
2991 | for (j = 0; j < node->nvar; ++j) |
2992 | node->sched = isl_mat_set_element(node->sched, |
2993 | row, 1 + node->nparam + j, csol->el[j]); |
2994 | node->coincident[graph->n_total_row] = coincident; |
2995 | } |
2996 | isl_vec_free(sol); |
2997 | isl_vec_free(csol); |
2998 | |
2999 | graph->n_row++; |
3000 | graph->n_total_row++; |
3001 | |
3002 | return 0; |
3003 | error: |
3004 | isl_vec_free(sol); |
3005 | isl_vec_free(csol); |
3006 | return -1; |
3007 | } |
3008 | |
3009 | /* Convert row "row" of node->sched into an isl_aff living in "ls" |
3010 | * and return this isl_aff. |
3011 | */ |
3012 | static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls, |
3013 | struct isl_sched_node *node, int row) |
3014 | { |
3015 | int j; |
3016 | isl_int v; |
3017 | isl_aff *aff; |
3018 | |
3019 | isl_int_init(v)isl_sioimath_init((v)); |
3020 | |
3021 | aff = isl_aff_zero_on_domain(ls); |
3022 | if (isl_mat_get_element(node->sched, row, 0, &v) < 0) |
3023 | goto error; |
3024 | aff = isl_aff_set_constant(aff, v); |
3025 | for (j = 0; j < node->nparam; ++j) { |
3026 | if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0) |
3027 | goto error; |
3028 | aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v); |
3029 | } |
3030 | for (j = 0; j < node->nvar; ++j) { |
3031 | if (isl_mat_get_element(node->sched, row, |
3032 | 1 + node->nparam + j, &v) < 0) |
3033 | goto error; |
3034 | aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v); |
3035 | } |
3036 | |
3037 | isl_int_clear(v)isl_sioimath_clear((v)); |
3038 | |
3039 | return aff; |
3040 | error: |
3041 | isl_int_clear(v)isl_sioimath_clear((v)); |
3042 | isl_aff_free(aff); |
3043 | return NULL((void*)0); |
3044 | } |
3045 | |
3046 | /* Convert the "n" rows starting at "first" of node->sched into a multi_aff |
3047 | * and return this multi_aff. |
3048 | * |
3049 | * The result is defined over the uncompressed node domain. |
3050 | */ |
3051 | static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff( |
3052 | struct isl_sched_node *node, int first, int n) |
3053 | { |
3054 | int i; |
3055 | isl_space *space; |
3056 | isl_local_space *ls; |
3057 | isl_aff *aff; |
3058 | isl_multi_aff *ma; |
3059 | int nrow; |
3060 | |
3061 | if (!node) |
3062 | return NULL((void*)0); |
3063 | nrow = isl_mat_rows(node->sched); |
Value stored to 'nrow' is never read | |
3064 | if (node->compressed) |
3065 | space = isl_multi_aff_get_domain_space(node->decompress); |
3066 | else |
3067 | space = isl_space_copy(node->space); |
3068 | ls = isl_local_space_from_space(isl_space_copy(space)); |
3069 | space = isl_space_from_domain(space); |
3070 | space = isl_space_add_dims(space, isl_dim_out, n); |
3071 | ma = isl_multi_aff_zero(space); |
3072 | |
3073 | for (i = first; i < first + n; ++i) { |
3074 | aff = extract_schedule_row(isl_local_space_copy(ls), node, i); |
3075 | ma = isl_multi_aff_set_aff(ma, i - first, aff); |
3076 | } |
3077 | |
3078 | isl_local_space_free(ls); |
3079 | |
3080 | if (node->compressed) |
3081 | ma = isl_multi_aff_pullback_multi_aff(ma, |
3082 | isl_multi_aff_copy(node->compress)); |
3083 | |
3084 | return ma; |
3085 | } |
3086 | |
3087 | /* Convert node->sched into a multi_aff and return this multi_aff. |
3088 | * |
3089 | * The result is defined over the uncompressed node domain. |
3090 | */ |
3091 | static __isl_give isl_multi_aff *node_extract_schedule_multi_aff( |
3092 | struct isl_sched_node *node) |
3093 | { |
3094 | int nrow; |
3095 | |
3096 | nrow = isl_mat_rows(node->sched); |
3097 | return node_extract_partial_schedule_multi_aff(node, 0, nrow); |
3098 | } |
3099 | |
3100 | /* Convert node->sched into a map and return this map. |
3101 | * |
3102 | * The result is cached in node->sched_map, which needs to be released |
3103 | * whenever node->sched is updated. |
3104 | * It is defined over the uncompressed node domain. |
3105 | */ |
3106 | static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node) |
3107 | { |
3108 | if (!node->sched_map) { |
3109 | isl_multi_aff *ma; |
3110 | |
3111 | ma = node_extract_schedule_multi_aff(node); |
3112 | node->sched_map = isl_map_from_multi_aff(ma); |
3113 | } |
3114 | |
3115 | return isl_map_copy(node->sched_map); |
3116 | } |
3117 | |
3118 | /* Construct a map that can be used to update a dependence relation |
3119 | * based on the current schedule. |
3120 | * That is, construct a map expressing that source and sink |
3121 | * are executed within the same iteration of the current schedule. |
3122 | * This map can then be intersected with the dependence relation. |
3123 | * This is not the most efficient way, but this shouldn't be a critical |
3124 | * operation. |
3125 | */ |
3126 | static __isl_give isl_map *specializer(struct isl_sched_node *src, |
3127 | struct isl_sched_node *dst) |
3128 | { |
3129 | isl_map *src_sched, *dst_sched; |
3130 | |
3131 | src_sched = node_extract_schedule(src); |
3132 | dst_sched = node_extract_schedule(dst); |
3133 | return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched)); |
3134 | } |
3135 | |
3136 | /* Intersect the domains of the nested relations in domain and range |
3137 | * of "umap" with "map". |
3138 | */ |
3139 | static __isl_give isl_union_map *intersect_domains( |
3140 | __isl_take isl_union_map *umap, __isl_keep isl_map *map) |
3141 | { |
3142 | isl_union_set *uset; |
3143 | |
3144 | umap = isl_union_map_zip(umap); |
3145 | uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map))); |
3146 | umap = isl_union_map_intersect_domain(umap, uset); |
3147 | umap = isl_union_map_zip(umap); |
3148 | return umap; |
3149 | } |
3150 | |
3151 | /* Update the dependence relation of the given edge based |
3152 | * on the current schedule. |
3153 | * If the dependence is carried completely by the current schedule, then |
3154 | * it is removed from the edge_tables. It is kept in the list of edges |
3155 | * as otherwise all edge_tables would have to be recomputed. |
3156 | * |
3157 | * If the edge is of a type that can appear multiple times |
3158 | * between the same pair of nodes, then it is added to |
3159 | * the edge table (again). This prevents the situation |
3160 | * where none of these edges is referenced from the edge table |
3161 | * because the one that was referenced turned out to be empty and |
3162 | * was therefore removed from the table. |
3163 | */ |
3164 | static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph, |
3165 | struct isl_sched_edge *edge) |
3166 | { |
3167 | int empty; |
3168 | isl_map *id; |
3169 | |
3170 | id = specializer(edge->src, edge->dst); |
3171 | edge->map = isl_map_intersect(edge->map, isl_map_copy(id)); |
3172 | if (!edge->map) |
3173 | goto error; |
3174 | |
3175 | if (edge->tagged_condition) { |
3176 | edge->tagged_condition = |
3177 | intersect_domains(edge->tagged_condition, id); |
3178 | if (!edge->tagged_condition) |
3179 | goto error; |
3180 | } |
3181 | if (edge->tagged_validity) { |
3182 | edge->tagged_validity = |
3183 | intersect_domains(edge->tagged_validity, id); |
3184 | if (!edge->tagged_validity) |
3185 | goto error; |
3186 | } |
3187 | |
3188 | empty = isl_map_plain_is_empty(edge->map); |
3189 | if (empty < 0) |
3190 | goto error; |
3191 | if (empty) { |
3192 | graph_remove_edge(graph, edge); |
3193 | } else if (is_multi_edge_type(edge)) { |
3194 | if (graph_edge_tables_add(ctx, graph, edge) < 0) |
3195 | goto error; |
3196 | } |
3197 | |
3198 | isl_map_free(id); |
3199 | return isl_stat_ok; |
3200 | error: |
3201 | isl_map_free(id); |
3202 | return isl_stat_error; |
3203 | } |
3204 | |
3205 | /* Does the domain of "umap" intersect "uset"? |
3206 | */ |
3207 | static int domain_intersects(__isl_keep isl_union_map *umap, |
3208 | __isl_keep isl_union_set *uset) |
3209 | { |
3210 | int empty; |
3211 | |
3212 | umap = isl_union_map_copy(umap); |
3213 | umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset)); |
3214 | empty = isl_union_map_is_empty(umap); |
3215 | isl_union_map_free(umap); |
3216 | |
3217 | return empty < 0 ? -1 : !empty; |
3218 | } |
3219 | |
3220 | /* Does the range of "umap" intersect "uset"? |
3221 | */ |
3222 | static int range_intersects(__isl_keep isl_union_map *umap, |
3223 | __isl_keep isl_union_set *uset) |
3224 | { |
3225 | int empty; |
3226 | |
3227 | umap = isl_union_map_copy(umap); |
3228 | umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset)); |
3229 | empty = isl_union_map_is_empty(umap); |
3230 | isl_union_map_free(umap); |
3231 | |
3232 | return empty < 0 ? -1 : !empty; |
3233 | } |
3234 | |
3235 | /* Are the condition dependences of "edge" local with respect to |
3236 | * the current schedule? |
3237 | * |
3238 | * That is, are domain and range of the condition dependences mapped |
3239 | * to the same point? |
3240 | * |
3241 | * In other words, is the condition false? |
3242 | */ |
3243 | static int is_condition_false(struct isl_sched_edge *edge) |
3244 | { |
3245 | isl_union_map *umap; |
3246 | isl_map *map, *sched, *test; |
3247 | int empty, local; |
3248 | |
3249 | empty = isl_union_map_is_empty(edge->tagged_condition); |
3250 | if (empty < 0 || empty) |
3251 | return empty; |
3252 | |
3253 | umap = isl_union_map_copy(edge->tagged_condition); |
3254 | umap = isl_union_map_zip(umap); |
3255 | umap = isl_union_set_unwrap(isl_union_map_domain(umap)); |
3256 | map = isl_map_from_union_map(umap); |
3257 | |
3258 | sched = node_extract_schedule(edge->src); |
3259 | map = isl_map_apply_domain(map, sched); |
3260 | sched = node_extract_schedule(edge->dst); |
3261 | map = isl_map_apply_range(map, sched); |
3262 | |
3263 | test = isl_map_identity(isl_map_get_space(map)); |
3264 | local = isl_map_is_subset(map, test); |
3265 | isl_map_free(map); |
3266 | isl_map_free(test); |
3267 | |
3268 | return local; |
3269 | } |
3270 | |
3271 | /* For each conditional validity constraint that is adjacent |
3272 | * to a condition with domain in condition_source or range in condition_sink, |
3273 | * turn it into an unconditional validity constraint. |
3274 | */ |
3275 | static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph, |
3276 | __isl_take isl_union_set *condition_source, |
3277 | __isl_take isl_union_set *condition_sink) |
3278 | { |
3279 | int i; |
3280 | |
3281 | condition_source = isl_union_set_coalesce(condition_source); |
3282 | condition_sink = isl_union_set_coalesce(condition_sink); |
3283 | |
3284 | for (i = 0; i < graph->n_edge; ++i) { |
3285 | int adjacent; |
3286 | isl_union_map *validity; |
3287 | |
3288 | if (!is_conditional_validity(&graph->edge[i])) |
3289 | continue; |
3290 | if (is_validity(&graph->edge[i])) |
3291 | continue; |
3292 | |
3293 | validity = graph->edge[i].tagged_validity; |
3294 | adjacent = domain_intersects(validity, condition_sink); |
3295 | if (adjacent >= 0 && !adjacent) |
3296 | adjacent = range_intersects(validity, condition_source); |
3297 | if (adjacent < 0) |
3298 | goto error; |
3299 | if (!adjacent) |
3300 | continue; |
3301 | |
3302 | set_validity(&graph->edge[i]); |
3303 | } |
3304 | |
3305 | isl_union_set_free(condition_source); |
3306 | isl_union_set_free(condition_sink); |
3307 | return 0; |
3308 | error: |
3309 | isl_union_set_free(condition_source); |
3310 | isl_union_set_free(condition_sink); |
3311 | return -1; |
3312 | } |
3313 | |
3314 | /* Update the dependence relations of all edges based on the current schedule |
3315 | * and enforce conditional validity constraints that are adjacent |
3316 | * to satisfied condition constraints. |
3317 | * |
3318 | * First check if any of the condition constraints are satisfied |
3319 | * (i.e., not local to the outer schedule) and keep track of |
3320 | * their domain and range. |
3321 | * Then update all dependence relations (which removes the non-local |
3322 | * constraints). |
3323 | * Finally, if any condition constraints turned out to be satisfied, |
3324 | * then turn all adjacent conditional validity constraints into |
3325 | * unconditional validity constraints. |
3326 | */ |
3327 | static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph) |
3328 | { |
3329 | int i; |
3330 | int any = 0; |
3331 | isl_union_set *source, *sink; |
3332 | |
3333 | source = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); |
3334 | sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); |
3335 | for (i = 0; i < graph->n_edge; ++i) { |
3336 | int local; |
3337 | isl_union_set *uset; |
3338 | isl_union_map *umap; |
3339 | |
3340 | if (!is_condition(&graph->edge[i])) |
3341 | continue; |
3342 | if (is_local(&graph->edge[i])) |
3343 | continue; |
3344 | local = is_condition_false(&graph->edge[i]); |
3345 | if (local < 0) |
3346 | goto error; |
3347 | if (local) |
3348 | continue; |
3349 | |
3350 | any = 1; |
3351 | |
3352 | umap = isl_union_map_copy(graph->edge[i].tagged_condition); |
3353 | uset = isl_union_map_domain(umap); |
3354 | source = isl_union_set_union(source, uset); |
3355 | |
3356 | umap = isl_union_map_copy(graph->edge[i].tagged_condition); |
3357 | uset = isl_union_map_range(umap); |
3358 | sink = isl_union_set_union(sink, uset); |
3359 | } |
3360 | |
3361 | for (i = 0; i < graph->n_edge; ++i) { |
3362 | if (update_edge(ctx, graph, &graph->edge[i]) < 0) |
3363 | goto error; |
3364 | } |
3365 | |
3366 | if (any) |
3367 | return unconditionalize_adjacent_validity(graph, source, sink); |
3368 | |
3369 | isl_union_set_free(source); |
3370 | isl_union_set_free(sink); |
3371 | return 0; |
3372 | error: |
3373 | isl_union_set_free(source); |
3374 | isl_union_set_free(sink); |
3375 | return -1; |
3376 | } |
3377 | |
3378 | static void next_band(struct isl_sched_graph *graph) |
3379 | { |
3380 | graph->band_start = graph->n_total_row; |
3381 | } |
3382 | |
3383 | /* Return the union of the universe domains of the nodes in "graph" |
3384 | * that satisfy "pred". |
3385 | */ |
3386 | static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx, |
3387 | struct isl_sched_graph *graph, |
3388 | int (*pred)(struct isl_sched_node *node, int data), int data) |
3389 | { |
3390 | int i; |
3391 | isl_setisl_map *set; |
3392 | isl_union_set *dom; |
3393 | |
3394 | for (i = 0; i < graph->n; ++i) |
3395 | if (pred(&graph->node[i], data)) |
3396 | break; |
3397 | |
3398 | if (i >= graph->n) |
3399 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "empty component" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3400); return ((void*)0); } while (0) |
3400 | "empty component", return NULL)do { isl_handle_error(ctx, isl_error_internal, "empty component" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3400); return ((void*)0); } while (0); |
3401 | |
3402 | set = isl_set_universe(isl_space_copy(graph->node[i].space)); |
3403 | dom = isl_union_set_from_set(set); |
3404 | |
3405 | for (i = i + 1; i < graph->n; ++i) { |
3406 | if (!pred(&graph->node[i], data)) |
3407 | continue; |
3408 | set = isl_set_universe(isl_space_copy(graph->node[i].space)); |
3409 | dom = isl_union_set_union(dom, isl_union_set_from_set(set)); |
3410 | } |
3411 | |
3412 | return dom; |
3413 | } |
3414 | |
3415 | /* Return a list of unions of universe domains, where each element |
3416 | * in the list corresponds to an SCC (or WCC) indexed by node->scc. |
3417 | */ |
3418 | static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx, |
3419 | struct isl_sched_graph *graph) |
3420 | { |
3421 | int i; |
3422 | isl_union_set_list *filters; |
3423 | |
3424 | filters = isl_union_set_list_alloc(ctx, graph->scc); |
3425 | for (i = 0; i < graph->scc; ++i) { |
3426 | isl_union_set *dom; |
3427 | |
3428 | dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i); |
3429 | filters = isl_union_set_list_add(filters, dom); |
3430 | } |
3431 | |
3432 | return filters; |
3433 | } |
3434 | |
3435 | /* Return a list of two unions of universe domains, one for the SCCs up |
3436 | * to and including graph->src_scc and another for the other SCCs. |
3437 | */ |
3438 | static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx, |
3439 | struct isl_sched_graph *graph) |
3440 | { |
3441 | isl_union_set *dom; |
3442 | isl_union_set_list *filters; |
3443 | |
3444 | filters = isl_union_set_list_alloc(ctx, 2); |
3445 | dom = isl_sched_graph_domain(ctx, graph, |
3446 | &node_scc_at_most, graph->src_scc); |
3447 | filters = isl_union_set_list_add(filters, dom); |
3448 | dom = isl_sched_graph_domain(ctx, graph, |
3449 | &node_scc_at_least, graph->src_scc + 1); |
3450 | filters = isl_union_set_list_add(filters, dom); |
3451 | |
3452 | return filters; |
3453 | } |
3454 | |
3455 | /* Copy nodes that satisfy node_pred from the src dependence graph |
3456 | * to the dst dependence graph. |
3457 | */ |
3458 | static isl_stat copy_nodes(struct isl_sched_graph *dst, |
3459 | struct isl_sched_graph *src, |
3460 | int (*node_pred)(struct isl_sched_node *node, int data), int data) |
3461 | { |
3462 | int i; |
3463 | |
3464 | dst->n = 0; |
3465 | for (i = 0; i < src->n; ++i) { |
3466 | int j; |
3467 | |
3468 | if (!node_pred(&src->node[i], data)) |
3469 | continue; |
3470 | |
3471 | j = dst->n; |
3472 | dst->node[j].space = isl_space_copy(src->node[i].space); |
3473 | dst->node[j].compressed = src->node[i].compressed; |
3474 | dst->node[j].hull = isl_set_copy(src->node[i].hull); |
3475 | dst->node[j].compress = |
3476 | isl_multi_aff_copy(src->node[i].compress); |
3477 | dst->node[j].decompress = |
3478 | isl_multi_aff_copy(src->node[i].decompress); |
3479 | dst->node[j].nvar = src->node[i].nvar; |
3480 | dst->node[j].nparam = src->node[i].nparam; |
3481 | dst->node[j].sched = isl_mat_copy(src->node[i].sched); |
3482 | dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map); |
3483 | dst->node[j].coincident = src->node[i].coincident; |
3484 | dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes); |
3485 | dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds); |
3486 | dst->node[j].max = isl_vec_copy(src->node[i].max); |
3487 | dst->n++; |
3488 | |
3489 | if (!dst->node[j].space || !dst->node[j].sched) |
3490 | return isl_stat_error; |
3491 | if (dst->node[j].compressed && |
3492 | (!dst->node[j].hull || !dst->node[j].compress || |
3493 | !dst->node[j].decompress)) |
3494 | return isl_stat_error; |
3495 | } |
3496 | |
3497 | return isl_stat_ok; |
3498 | } |
3499 | |
3500 | /* Copy non-empty edges that satisfy edge_pred from the src dependence graph |
3501 | * to the dst dependence graph. |
3502 | * If the source or destination node of the edge is not in the destination |
3503 | * graph, then it must be a backward proximity edge and it should simply |
3504 | * be ignored. |
3505 | */ |
3506 | static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, |
3507 | struct isl_sched_graph *src, |
3508 | int (*edge_pred)(struct isl_sched_edge *edge, int data), int data) |
3509 | { |
3510 | int i; |
3511 | |
3512 | dst->n_edge = 0; |
3513 | for (i = 0; i < src->n_edge; ++i) { |
3514 | struct isl_sched_edge *edge = &src->edge[i]; |
3515 | isl_map *map; |
3516 | isl_union_map *tagged_condition; |
3517 | isl_union_map *tagged_validity; |
3518 | struct isl_sched_node *dst_src, *dst_dst; |
3519 | |
3520 | if (!edge_pred(edge, data)) |
3521 | continue; |
3522 | |
3523 | if (isl_map_plain_is_empty(edge->map)) |
3524 | continue; |
3525 | |
3526 | dst_src = graph_find_node(ctx, dst, edge->src->space); |
3527 | dst_dst = graph_find_node(ctx, dst, edge->dst->space); |
3528 | if (!dst_src || !dst_dst) |
3529 | return isl_stat_error; |
3530 | if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) { |
3531 | if (is_validity(edge) || is_conditional_validity(edge)) |
3532 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "backward (conditional) validity edge" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3534); return isl_stat_error; } while (0) |
3533 | "backward (conditional) validity edge",do { isl_handle_error(ctx, isl_error_internal, "backward (conditional) validity edge" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3534); return isl_stat_error; } while (0) |
3534 | return isl_stat_error)do { isl_handle_error(ctx, isl_error_internal, "backward (conditional) validity edge" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3534); return isl_stat_error; } while (0); |
3535 | continue; |
3536 | } |
3537 | |
3538 | map = isl_map_copy(edge->map); |
3539 | tagged_condition = isl_union_map_copy(edge->tagged_condition); |
3540 | tagged_validity = isl_union_map_copy(edge->tagged_validity); |
3541 | |
3542 | dst->edge[dst->n_edge].src = dst_src; |
3543 | dst->edge[dst->n_edge].dst = dst_dst; |
3544 | dst->edge[dst->n_edge].map = map; |
3545 | dst->edge[dst->n_edge].tagged_condition = tagged_condition; |
3546 | dst->edge[dst->n_edge].tagged_validity = tagged_validity; |
3547 | dst->edge[dst->n_edge].types = edge->types; |
3548 | dst->n_edge++; |
3549 | |
3550 | if (edge->tagged_condition && !tagged_condition) |
3551 | return isl_stat_error; |
3552 | if (edge->tagged_validity && !tagged_validity) |
3553 | return isl_stat_error; |
3554 | |
3555 | if (graph_edge_tables_add(ctx, dst, |
3556 | &dst->edge[dst->n_edge - 1]) < 0) |
3557 | return isl_stat_error; |
3558 | } |
3559 | |
3560 | return isl_stat_ok; |
3561 | } |
3562 | |
3563 | /* Compute the maximal number of variables over all nodes. |
3564 | * This is the maximal number of linearly independent schedule |
3565 | * rows that we need to compute. |
3566 | * Just in case we end up in a part of the dependence graph |
3567 | * with only lower-dimensional domains, we make sure we will |
3568 | * compute the required amount of extra linearly independent rows. |
3569 | */ |
3570 | static int compute_maxvar(struct isl_sched_graph *graph) |
3571 | { |
3572 | int i; |
3573 | |
3574 | graph->maxvar = 0; |
3575 | for (i = 0; i < graph->n; ++i) { |
3576 | struct isl_sched_node *node = &graph->node[i]; |
3577 | int nvar; |
3578 | |
3579 | if (node_update_vmap(node) < 0) |
3580 | return -1; |
3581 | nvar = node->nvar + graph->n_row - node->rank; |
3582 | if (nvar > graph->maxvar) |
3583 | graph->maxvar = nvar; |
3584 | } |
3585 | |
3586 | return 0; |
3587 | } |
3588 | |
3589 | /* Extract the subgraph of "graph" that consists of the nodes satisfying |
3590 | * "node_pred" and the edges satisfying "edge_pred" and store |
3591 | * the result in "sub". |
3592 | */ |
3593 | static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph, |
3594 | int (*node_pred)(struct isl_sched_node *node, int data), |
3595 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
3596 | int data, struct isl_sched_graph *sub) |
3597 | { |
3598 | int i, n = 0, n_edge = 0; |
3599 | int t; |
3600 | |
3601 | for (i = 0; i < graph->n; ++i) |
3602 | if (node_pred(&graph->node[i], data)) |
3603 | ++n; |
3604 | for (i = 0; i < graph->n_edge; ++i) |
3605 | if (edge_pred(&graph->edge[i], data)) |
3606 | ++n_edge; |
3607 | if (graph_alloc(ctx, sub, n, n_edge) < 0) |
3608 | return isl_stat_error; |
3609 | sub->root = graph->root; |
3610 | if (copy_nodes(sub, graph, node_pred, data) < 0) |
3611 | return isl_stat_error; |
3612 | if (graph_init_table(ctx, sub) < 0) |
3613 | return isl_stat_error; |
3614 | for (t = 0; t <= isl_edge_last; ++t) |
3615 | sub->max_edge[t] = graph->max_edge[t]; |
3616 | if (graph_init_edge_tables(ctx, sub) < 0) |
3617 | return isl_stat_error; |
3618 | if (copy_edges(ctx, sub, graph, edge_pred, data) < 0) |
3619 | return isl_stat_error; |
3620 | sub->n_row = graph->n_row; |
3621 | sub->max_row = graph->max_row; |
3622 | sub->n_total_row = graph->n_total_row; |
3623 | sub->band_start = graph->band_start; |
3624 | |
3625 | return isl_stat_ok; |
3626 | } |
3627 | |
3628 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
3629 | struct isl_sched_graph *graph); |
3630 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
3631 | isl_schedule_node *node, struct isl_sched_graph *graph); |
3632 | |
3633 | /* Compute a schedule for a subgraph of "graph". In particular, for |
3634 | * the graph composed of nodes that satisfy node_pred and edges that |
3635 | * that satisfy edge_pred. |
3636 | * If the subgraph is known to consist of a single component, then wcc should |
3637 | * be set and then we call compute_schedule_wcc on the constructed subgraph. |
3638 | * Otherwise, we call compute_schedule, which will check whether the subgraph |
3639 | * is connected. |
3640 | * |
3641 | * The schedule is inserted at "node" and the updated schedule node |
3642 | * is returned. |
3643 | */ |
3644 | static __isl_give isl_schedule_node *compute_sub_schedule( |
3645 | __isl_take isl_schedule_node *node, isl_ctx *ctx, |
3646 | struct isl_sched_graph *graph, |
3647 | int (*node_pred)(struct isl_sched_node *node, int data), |
3648 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
3649 | int data, int wcc) |
3650 | { |
3651 | struct isl_sched_graph split = { 0 }; |
3652 | |
3653 | if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data, |
3654 | &split) < 0) |
3655 | goto error; |
3656 | |
3657 | if (wcc) |
3658 | node = compute_schedule_wcc(node, &split); |
3659 | else |
3660 | node = compute_schedule(node, &split); |
3661 | |
3662 | graph_free(ctx, &split); |
3663 | return node; |
3664 | error: |
3665 | graph_free(ctx, &split); |
3666 | return isl_schedule_node_free(node); |
3667 | } |
3668 | |
3669 | static int edge_scc_exactly(struct isl_sched_edge *edge, int scc) |
3670 | { |
3671 | return edge->src->scc == scc && edge->dst->scc == scc; |
3672 | } |
3673 | |
3674 | static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) |
3675 | { |
3676 | return edge->dst->scc <= scc; |
3677 | } |
3678 | |
3679 | static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc) |
3680 | { |
3681 | return edge->src->scc >= scc; |
3682 | } |
3683 | |
3684 | /* Reset the current band by dropping all its schedule rows. |
3685 | */ |
3686 | static isl_stat reset_band(struct isl_sched_graph *graph) |
3687 | { |
3688 | int i; |
3689 | int drop; |
3690 | |
3691 | drop = graph->n_total_row - graph->band_start; |
3692 | graph->n_total_row -= drop; |
3693 | graph->n_row -= drop; |
3694 | |
3695 | for (i = 0; i < graph->n; ++i) { |
3696 | struct isl_sched_node *node = &graph->node[i]; |
3697 | |
3698 | isl_map_free(node->sched_map); |
3699 | node->sched_map = NULL((void*)0); |
3700 | |
3701 | node->sched = isl_mat_drop_rows(node->sched, |
3702 | graph->band_start, drop); |
3703 | |
3704 | if (!node->sched) |
3705 | return isl_stat_error; |
3706 | } |
3707 | |
3708 | return isl_stat_ok; |
3709 | } |
3710 | |
3711 | /* Split the current graph into two parts and compute a schedule for each |
3712 | * part individually. In particular, one part consists of all SCCs up |
3713 | * to and including graph->src_scc, while the other part contains the other |
3714 | * SCCs. The split is enforced by a sequence node inserted at position "node" |
3715 | * in the schedule tree. Return the updated schedule node. |
3716 | * If either of these two parts consists of a sequence, then it is spliced |
3717 | * into the sequence containing the two parts. |
3718 | * |
3719 | * The current band is reset. It would be possible to reuse |
3720 | * the previously computed rows as the first rows in the next |
3721 | * band, but recomputing them may result in better rows as we are looking |
3722 | * at a smaller part of the dependence graph. |
3723 | */ |
3724 | static __isl_give isl_schedule_node *compute_split_schedule( |
3725 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
3726 | { |
3727 | int is_seq; |
3728 | isl_ctx *ctx; |
3729 | isl_union_set_list *filters; |
3730 | |
3731 | if (!node) |
3732 | return NULL((void*)0); |
3733 | |
3734 | if (reset_band(graph) < 0) |
3735 | return isl_schedule_node_free(node); |
3736 | |
3737 | next_band(graph); |
3738 | |
3739 | ctx = isl_schedule_node_get_ctx(node); |
3740 | filters = extract_split(ctx, graph); |
3741 | node = isl_schedule_node_insert_sequence(node, filters); |
3742 | node = isl_schedule_node_child(node, 1); |
3743 | node = isl_schedule_node_child(node, 0); |
3744 | |
3745 | node = compute_sub_schedule(node, ctx, graph, |
3746 | &node_scc_at_least, &edge_src_scc_at_least, |
3747 | graph->src_scc + 1, 0); |
3748 | is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence; |
3749 | node = isl_schedule_node_parent(node); |
3750 | node = isl_schedule_node_parent(node); |
3751 | if (is_seq) |
3752 | node = isl_schedule_node_sequence_splice_child(node, 1); |
3753 | node = isl_schedule_node_child(node, 0); |
3754 | node = isl_schedule_node_child(node, 0); |
3755 | node = compute_sub_schedule(node, ctx, graph, |
3756 | &node_scc_at_most, &edge_dst_scc_at_most, |
3757 | graph->src_scc, 0); |
3758 | is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence; |
3759 | node = isl_schedule_node_parent(node); |
3760 | node = isl_schedule_node_parent(node); |
3761 | if (is_seq) |
3762 | node = isl_schedule_node_sequence_splice_child(node, 0); |
3763 | |
3764 | return node; |
3765 | } |
3766 | |
3767 | /* Insert a band node at position "node" in the schedule tree corresponding |
3768 | * to the current band in "graph". Mark the band node permutable |
3769 | * if "permutable" is set. |
3770 | * The partial schedules and the coincidence property are extracted |
3771 | * from the graph nodes. |
3772 | * Return the updated schedule node. |
3773 | */ |
3774 | static __isl_give isl_schedule_node *insert_current_band( |
3775 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
3776 | int permutable) |
3777 | { |
3778 | int i; |
3779 | int start, end, n; |
3780 | isl_multi_aff *ma; |
3781 | isl_multi_pw_aff *mpa; |
3782 | isl_multi_union_pw_aff *mupa; |
3783 | |
3784 | if (!node) |
3785 | return NULL((void*)0); |
3786 | |
3787 | if (graph->n < 1) |
3788 | isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,do { isl_handle_error(isl_schedule_node_get_ctx(node), isl_error_internal , "graph should have at least one node", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3790); return isl_schedule_node_free(node); } while (0) |
3789 | "graph should have at least one node",do { isl_handle_error(isl_schedule_node_get_ctx(node), isl_error_internal , "graph should have at least one node", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3790); return isl_schedule_node_free(node); } while (0) |
3790 | return isl_schedule_node_free(node))do { isl_handle_error(isl_schedule_node_get_ctx(node), isl_error_internal , "graph should have at least one node", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 3790); return isl_schedule_node_free(node); } while (0); |
3791 | |
3792 | start = graph->band_start; |
3793 | end = graph->n_total_row; |
3794 | n = end - start; |
3795 | |
3796 | ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n); |
3797 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
3798 | mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
3799 | |
3800 | for (i = 1; i < graph->n; ++i) { |
3801 | isl_multi_union_pw_aff *mupa_i; |
3802 | |
3803 | ma = node_extract_partial_schedule_multi_aff(&graph->node[i], |
3804 | start, n); |
3805 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
3806 | mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
3807 | mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i); |
3808 | } |
3809 | node = isl_schedule_node_insert_partial_schedule(node, mupa); |
3810 | |
3811 | for (i = 0; i < n; ++i) |
3812 | node = isl_schedule_node_band_member_set_coincident(node, i, |
3813 | graph->node[0].coincident[start + i]); |
3814 | node = isl_schedule_node_band_set_permutable(node, permutable); |
3815 | |
3816 | return node; |
3817 | } |
3818 | |
3819 | /* Update the dependence relations based on the current schedule, |
3820 | * add the current band to "node" and then continue with the computation |
3821 | * of the next band. |
3822 | * Return the updated schedule node. |
3823 | */ |
3824 | static __isl_give isl_schedule_node *compute_next_band( |
3825 | __isl_take isl_schedule_node *node, |
3826 | struct isl_sched_graph *graph, int permutable) |
3827 | { |
3828 | isl_ctx *ctx; |
3829 | |
3830 | if (!node) |
3831 | return NULL((void*)0); |
3832 | |
3833 | ctx = isl_schedule_node_get_ctx(node); |
3834 | if (update_edges(ctx, graph) < 0) |
3835 | return isl_schedule_node_free(node); |
3836 | node = insert_current_band(node, graph, permutable); |
3837 | next_band(graph); |
3838 | |
3839 | node = isl_schedule_node_child(node, 0); |
3840 | node = compute_schedule(node, graph); |
3841 | node = isl_schedule_node_parent(node); |
3842 | |
3843 | return node; |
3844 | } |
3845 | |
3846 | /* Add the constraints "coef" derived from an edge from "node" to itself |
3847 | * to graph->lp in order to respect the dependences and to try and carry them. |
3848 | * "pos" is the sequence number of the edge that needs to be carried. |
3849 | * "coef" represents general constraints on coefficients (c_0, c_x) |
3850 | * of valid constraints for (y - x) with x and y instances of the node. |
3851 | * |
3852 | * The constraints added to graph->lp need to enforce |
3853 | * |
3854 | * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x) |
3855 | * = c_j_x (y - x) >= e_i |
3856 | * |
3857 | * for each (x,y) in the dependence relation of the edge. |
3858 | * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x), |
3859 | * taking into account that each coefficient in c_j_x is represented |
3860 | * as a pair of non-negative coefficients. |
3861 | */ |
3862 | static isl_stat add_intra_constraints(struct isl_sched_graph *graph, |
3863 | struct isl_sched_node *node, __isl_take isl_basic_setisl_basic_map *coef, int pos) |
3864 | { |
3865 | int offset; |
3866 | isl_ctx *ctx; |
3867 | isl_dim_map *dim_map; |
3868 | |
3869 | if (!coef) |
3870 | return isl_stat_error; |
3871 | |
3872 | ctx = isl_basic_set_get_ctx(coef); |
3873 | offset = coef_var_offset(coef); |
3874 | dim_map = intra_dim_map(ctx, graph, node, offset, 1); |
3875 | isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); |
3876 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
3877 | |
3878 | return isl_stat_ok; |
3879 | } |
3880 | |
3881 | /* Add the constraints "coef" derived from an edge from "src" to "dst" |
3882 | * to graph->lp in order to respect the dependences and to try and carry them. |
3883 | * "pos" is the sequence number of the edge that needs to be carried or |
3884 | * -1 if no attempt should be made to carry the dependences. |
3885 | * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y) |
3886 | * of valid constraints for (x, y) with x and y instances of "src" and "dst". |
3887 | * |
3888 | * The constraints added to graph->lp need to enforce |
3889 | * |
3890 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i |
3891 | * |
3892 | * for each (x,y) in the dependence relation of the edge or |
3893 | * |
3894 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0 |
3895 | * |
3896 | * if pos is -1. |
3897 | * That is, |
3898 | * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
3899 | * or |
3900 | * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
3901 | * needs to be plugged in for (c_0, c_n, c_x, c_y), |
3902 | * taking into account that each coefficient in c_j_x and c_k_x is represented |
3903 | * as a pair of non-negative coefficients. |
3904 | */ |
3905 | static isl_stat add_inter_constraints(struct isl_sched_graph *graph, |
3906 | struct isl_sched_node *src, struct isl_sched_node *dst, |
3907 | __isl_take isl_basic_setisl_basic_map *coef, int pos) |
3908 | { |
3909 | int offset; |
3910 | isl_ctx *ctx; |
3911 | isl_dim_map *dim_map; |
3912 | |
3913 | if (!coef) |
3914 | return isl_stat_error; |
3915 | |
3916 | ctx = isl_basic_set_get_ctx(coef); |
3917 | offset = coef_var_offset(coef); |
3918 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1); |
3919 | if (pos >= 0) |
3920 | isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); |
3921 | graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); |
3922 | |
3923 | return isl_stat_ok; |
3924 | } |
3925 | |
3926 | /* Data structure for keeping track of the data needed |
3927 | * to exploit non-trivial lineality spaces. |
3928 | * |
3929 | * "any_non_trivial" is true if there are any non-trivial lineality spaces. |
3930 | * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL. |
3931 | * "equivalent" connects instances to other instances on the same line(s). |
3932 | * "mask" contains the domain spaces of "equivalent". |
3933 | * Any instance set not in "mask" does not have a non-trivial lineality space. |
3934 | */ |
3935 | struct isl_exploit_lineality_data { |
3936 | isl_bool any_non_trivial; |
3937 | isl_union_map *equivalent; |
3938 | isl_union_set *mask; |
3939 | }; |
3940 | |
3941 | /* Data structure collecting information used during the construction |
3942 | * of an LP for carrying dependences. |
3943 | * |
3944 | * "intra" is a sequence of coefficient constraints for intra-node edges. |
3945 | * "inter" is a sequence of coefficient constraints for inter-node edges. |
3946 | * "lineality" contains data used to exploit non-trivial lineality spaces. |
3947 | */ |
3948 | struct isl_carry { |
3949 | isl_basic_set_listisl_basic_map_list *intra; |
3950 | isl_basic_set_listisl_basic_map_list *inter; |
3951 | struct isl_exploit_lineality_data lineality; |
3952 | }; |
3953 | |
3954 | /* Free all the data stored in "carry". |
3955 | */ |
3956 | static void isl_carry_clear(struct isl_carry *carry) |
3957 | { |
3958 | isl_basic_set_list_free(carry->intra); |
3959 | isl_basic_set_list_free(carry->inter); |
3960 | isl_union_map_free(carry->lineality.equivalent); |
3961 | isl_union_set_free(carry->lineality.mask); |
3962 | } |
3963 | |
3964 | /* Return a pointer to the node in "graph" that lives in "space". |
3965 | * If the requested node has been compressed, then "space" |
3966 | * corresponds to the compressed space. |
3967 | * The graph is assumed to have such a node. |
3968 | * Return NULL in case of error. |
3969 | * |
3970 | * First try and see if "space" is the space of an uncompressed node. |
3971 | * If so, return that node. |
3972 | * Otherwise, "space" was constructed by construct_compressed_id and |
3973 | * contains a user pointer pointing to the node in the tuple id. |
3974 | * However, this node belongs to the original dependence graph. |
3975 | * If "graph" is a subgraph of this original dependence graph, |
3976 | * then the node with the same space still needs to be looked up |
3977 | * in the current graph. |
3978 | */ |
3979 | static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx, |
3980 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
3981 | { |
3982 | isl_id *id; |
3983 | struct isl_sched_node *node; |
3984 | |
3985 | if (!space) |
3986 | return NULL((void*)0); |
3987 | |
3988 | node = graph_find_node(ctx, graph, space); |
3989 | if (!node) |
3990 | return NULL((void*)0); |
3991 | if (is_node(graph, node)) |
3992 | return node; |
3993 | |
3994 | id = isl_space_get_tuple_id(space, isl_dim_set); |
3995 | node = isl_id_get_user(id); |
3996 | isl_id_free(id); |
3997 | |
3998 | if (!node) |
3999 | return NULL((void*)0); |
4000 | |
4001 | if (!is_node(graph->root, node)) |
4002 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "space points to invalid node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4003); return ((void*)0); } while (0) |
4003 | "space points to invalid node", return NULL)do { isl_handle_error(ctx, isl_error_internal, "space points to invalid node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4003); return ((void*)0); } while (0); |
4004 | if (graph != graph->root) |
4005 | node = graph_find_node(ctx, graph, node->space); |
4006 | if (!is_node(graph, node)) |
4007 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "unable to find node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4008); return ((void*)0); } while (0) |
4008 | "unable to find node", return NULL)do { isl_handle_error(ctx, isl_error_internal, "unable to find node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4008); return ((void*)0); } while (0); |
4009 | |
4010 | return node; |
4011 | } |
4012 | |
4013 | /* Internal data structure for add_all_constraints. |
4014 | * |
4015 | * "graph" is the schedule constraint graph for which an LP problem |
4016 | * is being constructed. |
4017 | * "carry_inter" indicates whether inter-node edges should be carried. |
4018 | * "pos" is the position of the next edge that needs to be carried. |
4019 | */ |
4020 | struct isl_add_all_constraints_data { |
4021 | isl_ctx *ctx; |
4022 | struct isl_sched_graph *graph; |
4023 | int carry_inter; |
4024 | int pos; |
4025 | }; |
4026 | |
4027 | /* Add the constraints "coef" derived from an edge from a node to itself |
4028 | * to data->graph->lp in order to respect the dependences and |
4029 | * to try and carry them. |
4030 | * |
4031 | * The space of "coef" is of the form |
4032 | * |
4033 | * coefficients[[c_cst] -> S[c_x]] |
4034 | * |
4035 | * with S[c_x] the (compressed) space of the node. |
4036 | * Extract the node from the space and call add_intra_constraints. |
4037 | */ |
4038 | static isl_stat lp_add_intra(__isl_take isl_basic_setisl_basic_map *coef, void *user) |
4039 | { |
4040 | struct isl_add_all_constraints_data *data = user; |
4041 | isl_space *space; |
4042 | struct isl_sched_node *node; |
4043 | |
4044 | space = isl_basic_set_get_space(coef); |
4045 | space = isl_space_range(isl_space_unwrap(space)); |
4046 | node = graph_find_compressed_node(data->ctx, data->graph, space); |
4047 | isl_space_free(space); |
4048 | return add_intra_constraints(data->graph, node, coef, data->pos++); |
4049 | } |
4050 | |
4051 | /* Add the constraints "coef" derived from an edge from a node j |
4052 | * to a node k to data->graph->lp in order to respect the dependences and |
4053 | * to try and carry them (provided data->carry_inter is set). |
4054 | * |
4055 | * The space of "coef" is of the form |
4056 | * |
4057 | * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]] |
4058 | * |
4059 | * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes. |
4060 | * Extract the nodes from the space and call add_inter_constraints. |
4061 | */ |
4062 | static isl_stat lp_add_inter(__isl_take isl_basic_setisl_basic_map *coef, void *user) |
4063 | { |
4064 | struct isl_add_all_constraints_data *data = user; |
4065 | isl_space *space, *dom; |
4066 | struct isl_sched_node *src, *dst; |
4067 | int pos; |
4068 | |
4069 | space = isl_basic_set_get_space(coef); |
4070 | space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space))); |
4071 | dom = isl_space_domain(isl_space_copy(space)); |
4072 | src = graph_find_compressed_node(data->ctx, data->graph, dom); |
4073 | isl_space_free(dom); |
4074 | space = isl_space_range(space); |
4075 | dst = graph_find_compressed_node(data->ctx, data->graph, space); |
4076 | isl_space_free(space); |
4077 | |
4078 | pos = data->carry_inter ? data->pos++ : -1; |
4079 | return add_inter_constraints(data->graph, src, dst, coef, pos); |
4080 | } |
4081 | |
4082 | /* Add constraints to graph->lp that force all (conditional) validity |
4083 | * dependences to be respected and attempt to carry them. |
4084 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4085 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4086 | * "carry_inter" indicates whether inter-node edges should be carried or |
4087 | * only respected. |
4088 | */ |
4089 | static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, |
4090 | __isl_keep isl_basic_set_listisl_basic_map_list *intra, |
4091 | __isl_keep isl_basic_set_listisl_basic_map_list *inter, int carry_inter) |
4092 | { |
4093 | struct isl_add_all_constraints_data data = { ctx, graph, carry_inter }; |
4094 | |
4095 | data.pos = 0; |
4096 | if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0) |
4097 | return isl_stat_error; |
4098 | if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0) |
4099 | return isl_stat_error; |
4100 | return isl_stat_ok; |
4101 | } |
4102 | |
4103 | /* Internal data structure for count_all_constraints |
4104 | * for keeping track of the number of equality and inequality constraints. |
4105 | */ |
4106 | struct isl_sched_count { |
4107 | int n_eq; |
4108 | int n_ineq; |
4109 | }; |
4110 | |
4111 | /* Add the number of equality and inequality constraints of "bset" |
4112 | * to data->n_eq and data->n_ineq. |
4113 | */ |
4114 | static isl_stat bset_update_count(__isl_take isl_basic_setisl_basic_map *bset, void *user) |
4115 | { |
4116 | struct isl_sched_count *data = user; |
4117 | |
4118 | return update_count(bset, 1, &data->n_eq, &data->n_ineq); |
4119 | } |
4120 | |
4121 | /* Count the number of equality and inequality constraints |
4122 | * that will be added to the carry_lp problem. |
4123 | * We count each edge exactly once. |
4124 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4125 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4126 | */ |
4127 | static isl_stat count_all_constraints(__isl_keep isl_basic_set_listisl_basic_map_list *intra, |
4128 | __isl_keep isl_basic_set_listisl_basic_map_list *inter, int *n_eq, int *n_ineq) |
4129 | { |
4130 | struct isl_sched_count data; |
4131 | |
4132 | data.n_eq = data.n_ineq = 0; |
4133 | if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0) |
4134 | return isl_stat_error; |
4135 | if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0) |
4136 | return isl_stat_error; |
4137 | |
4138 | *n_eq = data.n_eq; |
4139 | *n_ineq = data.n_ineq; |
4140 | |
4141 | return isl_stat_ok; |
4142 | } |
4143 | |
4144 | /* Construct an LP problem for finding schedule coefficients |
4145 | * such that the schedule carries as many validity dependences as possible. |
4146 | * In particular, for each dependence i, we bound the dependence distance |
4147 | * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum |
4148 | * of all e_i's. Dependences with e_i = 0 in the solution are simply |
4149 | * respected, while those with e_i > 0 (in practice e_i = 1) are carried. |
4150 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4151 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4152 | * "n_edge" is the total number of edges. |
4153 | * "carry_inter" indicates whether inter-node edges should be carried or |
4154 | * only respected. That is, if "carry_inter" is not set, then |
4155 | * no e_i variables are introduced for the inter-node edges. |
4156 | * |
4157 | * All variables of the LP are non-negative. The actual coefficients |
4158 | * may be negative, so each coefficient is represented as the difference |
4159 | * of two non-negative variables. The negative part always appears |
4160 | * immediately before the positive part. |
4161 | * Other than that, the variables have the following order |
4162 | * |
4163 | * - sum of (1 - e_i) over all edges |
4164 | * - sum of all c_n coefficients |
4165 | * (unconstrained when computing non-parametric schedules) |
4166 | * - sum of positive and negative parts of all c_x coefficients |
4167 | * - for each edge |
4168 | * - e_i |
4169 | * - for each node |
4170 | * - positive and negative parts of c_i_x, in opposite order |
4171 | * - c_i_n (if parametric) |
4172 | * - c_i_0 |
4173 | * |
4174 | * The constraints are those from the (validity) edges plus three equalities |
4175 | * to express the sums and n_edge inequalities to express e_i <= 1. |
4176 | */ |
4177 | static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
4178 | int n_edge, __isl_keep isl_basic_set_listisl_basic_map_list *intra, |
4179 | __isl_keep isl_basic_set_listisl_basic_map_list *inter, int carry_inter) |
4180 | { |
4181 | int i; |
4182 | int k; |
4183 | isl_space *dim; |
4184 | unsigned total; |
4185 | int n_eq, n_ineq; |
4186 | |
4187 | total = 3 + n_edge; |
4188 | for (i = 0; i < graph->n; ++i) { |
4189 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
4190 | node->start = total; |
4191 | total += 1 + node->nparam + 2 * node->nvar; |
4192 | } |
4193 | |
4194 | if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0) |
4195 | return isl_stat_error; |
4196 | |
4197 | dim = isl_space_set_alloc(ctx, 0, total); |
4198 | isl_basic_set_free(graph->lp); |
4199 | n_eq += 3; |
4200 | n_ineq += n_edge; |
4201 | graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq); |
4202 | graph->lp = isl_basic_set_set_rational(graph->lp); |
4203 | |
4204 | k = isl_basic_set_alloc_equality(graph->lp); |
4205 | if (k < 0) |
4206 | return isl_stat_error; |
4207 | isl_seq_clr(graph->lp->eq[k], 1 + total); |
4208 | isl_int_set_si(graph->lp->eq[k][0], -n_edge)isl_sioimath_set_si((graph->lp->eq[k][0]), -n_edge); |
4209 | isl_int_set_si(graph->lp->eq[k][1], 1)isl_sioimath_set_si((graph->lp->eq[k][1]), 1); |
4210 | for (i = 0; i < n_edge; ++i) |
4211 | isl_int_set_si(graph->lp->eq[k][4 + i], 1)isl_sioimath_set_si((graph->lp->eq[k][4 + i]), 1); |
4212 | |
4213 | if (add_param_sum_constraint(graph, 1) < 0) |
4214 | return isl_stat_error; |
4215 | if (add_var_sum_constraint(graph, 2) < 0) |
4216 | return isl_stat_error; |
4217 | |
4218 | for (i = 0; i < n_edge; ++i) { |
4219 | k = isl_basic_set_alloc_inequality(graph->lp); |
4220 | if (k < 0) |
4221 | return isl_stat_error; |
4222 | isl_seq_clr(graph->lp->ineq[k], 1 + total); |
4223 | isl_int_set_si(graph->lp->ineq[k][4 + i], -1)isl_sioimath_set_si((graph->lp->ineq[k][4 + i]), -1); |
4224 | isl_int_set_si(graph->lp->ineq[k][0], 1)isl_sioimath_set_si((graph->lp->ineq[k][0]), 1); |
4225 | } |
4226 | |
4227 | if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0) |
4228 | return isl_stat_error; |
4229 | |
4230 | return isl_stat_ok; |
4231 | } |
4232 | |
4233 | static __isl_give isl_schedule_node *compute_component_schedule( |
4234 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
4235 | int wcc); |
4236 | |
4237 | /* If the schedule_split_scaled option is set and if the linear |
4238 | * parts of the scheduling rows for all nodes in the graphs have |
4239 | * a non-trivial common divisor, then remove this |
4240 | * common divisor from the linear part. |
4241 | * Otherwise, insert a band node directly and continue with |
4242 | * the construction of the schedule. |
4243 | * |
4244 | * If a non-trivial common divisor is found, then |
4245 | * the linear part is reduced and the remainder is ignored. |
4246 | * The pieces of the graph that are assigned different remainders |
4247 | * form (groups of) strongly connected components within |
4248 | * the scaled down band. If needed, they can therefore |
4249 | * be ordered along this remainder in a sequence node. |
4250 | * However, this ordering is not enforced here in order to allow |
4251 | * the scheduler to combine some of the strongly connected components. |
4252 | */ |
4253 | static __isl_give isl_schedule_node *split_scaled( |
4254 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
4255 | { |
4256 | int i; |
4257 | int row; |
4258 | isl_ctx *ctx; |
4259 | isl_int gcd, gcd_i; |
4260 | |
4261 | if (!node) |
4262 | return NULL((void*)0); |
4263 | |
4264 | ctx = isl_schedule_node_get_ctx(node); |
4265 | if (!ctx->opt->schedule_split_scaled) |
4266 | return compute_next_band(node, graph, 0); |
4267 | if (graph->n <= 1) |
4268 | return compute_next_band(node, graph, 0); |
4269 | |
4270 | isl_int_init(gcd)isl_sioimath_init((gcd)); |
4271 | isl_int_init(gcd_i)isl_sioimath_init((gcd_i)); |
4272 | |
4273 | isl_int_set_si(gcd, 0)isl_sioimath_set_si((gcd), 0); |
4274 | |
4275 | row = isl_mat_rows(graph->node[0].sched) - 1; |
4276 | |
4277 | for (i = 0; i < graph->n; ++i) { |
4278 | struct isl_sched_node *node = &graph->node[i]; |
4279 | int cols = isl_mat_cols(node->sched); |
4280 | |
4281 | isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i); |
4282 | isl_int_gcd(gcd, gcd, gcd_i)isl_sioimath_gcd((gcd), *(gcd), *(gcd_i)); |
4283 | } |
4284 | |
4285 | isl_int_clear(gcd_i)isl_sioimath_clear((gcd_i)); |
4286 | |
4287 | if (isl_int_cmp_si(gcd, 1)isl_sioimath_cmp_si(*(gcd), 1) <= 0) { |
4288 | isl_int_clear(gcd)isl_sioimath_clear((gcd)); |
4289 | return compute_next_band(node, graph, 0); |
4290 | } |
4291 | |
4292 | for (i = 0; i < graph->n; ++i) { |
4293 | struct isl_sched_node *node = &graph->node[i]; |
4294 | |
4295 | isl_int_fdiv_q(node->sched->row[row][0],isl_sioimath_fdiv_q((node->sched->row[row][0]), *(node-> sched->row[row][0]), *(gcd)) |
4296 | node->sched->row[row][0], gcd)isl_sioimath_fdiv_q((node->sched->row[row][0]), *(node-> sched->row[row][0]), *(gcd)); |
4297 | isl_int_mul(node->sched->row[row][0],isl_sioimath_mul((node->sched->row[row][0]), *(node-> sched->row[row][0]), *(gcd)) |
4298 | node->sched->row[row][0], gcd)isl_sioimath_mul((node->sched->row[row][0]), *(node-> sched->row[row][0]), *(gcd)); |
4299 | node->sched = isl_mat_scale_down_row(node->sched, row, gcd); |
4300 | if (!node->sched) |
4301 | goto error; |
4302 | } |
4303 | |
4304 | isl_int_clear(gcd)isl_sioimath_clear((gcd)); |
4305 | |
4306 | return compute_next_band(node, graph, 0); |
4307 | error: |
4308 | isl_int_clear(gcd)isl_sioimath_clear((gcd)); |
4309 | return isl_schedule_node_free(node); |
4310 | } |
4311 | |
4312 | /* Is the schedule row "sol" trivial on node "node"? |
4313 | * That is, is the solution zero on the dimensions linearly independent of |
4314 | * the previously found solutions? |
4315 | * Return 1 if the solution is trivial, 0 if it is not and -1 on error. |
4316 | * |
4317 | * Each coefficient is represented as the difference between |
4318 | * two non-negative values in "sol". |
4319 | * We construct the schedule row s and check if it is linearly |
4320 | * independent of previously computed schedule rows |
4321 | * by computing T s, with T the linear combinations that are zero |
4322 | * on linearly dependent schedule rows. |
4323 | * If the result consists of all zeros, then the solution is trivial. |
4324 | */ |
4325 | static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) |
4326 | { |
4327 | int trivial; |
4328 | isl_vec *node_sol; |
4329 | |
4330 | if (!sol) |
4331 | return -1; |
4332 | if (node->nvar == node->rank) |
4333 | return 0; |
4334 | |
4335 | node_sol = extract_var_coef(node, sol); |
4336 | node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol); |
4337 | if (!node_sol) |
4338 | return -1; |
4339 | |
4340 | trivial = isl_seq_first_non_zero(node_sol->el, |
4341 | node->nvar - node->rank) == -1; |
4342 | |
4343 | isl_vec_free(node_sol); |
4344 | |
4345 | return trivial; |
4346 | } |
4347 | |
4348 | /* Is the schedule row "sol" trivial on any node where it should |
4349 | * not be trivial? |
4350 | * Return 1 if any solution is trivial, 0 if they are not and -1 on error. |
4351 | */ |
4352 | static int is_any_trivial(struct isl_sched_graph *graph, |
4353 | __isl_keep isl_vec *sol) |
4354 | { |
4355 | int i; |
4356 | |
4357 | for (i = 0; i < graph->n; ++i) { |
4358 | struct isl_sched_node *node = &graph->node[i]; |
4359 | int trivial; |
4360 | |
4361 | if (!needs_row(graph, node)) |
4362 | continue; |
4363 | trivial = is_trivial(node, sol); |
4364 | if (trivial < 0 || trivial) |
4365 | return trivial; |
4366 | } |
4367 | |
4368 | return 0; |
4369 | } |
4370 | |
4371 | /* Does the schedule represented by "sol" perform loop coalescing on "node"? |
4372 | * If so, return the position of the coalesced dimension. |
4373 | * Otherwise, return node->nvar or -1 on error. |
4374 | * |
4375 | * In particular, look for pairs of coefficients c_i and c_j such that |
4376 | * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|. |
4377 | * If any such pair is found, then return i. |
4378 | * If size_i is infinity, then no check on c_i needs to be performed. |
4379 | */ |
4380 | static int find_node_coalescing(struct isl_sched_node *node, |
4381 | __isl_keep isl_vec *sol) |
4382 | { |
4383 | int i, j; |
4384 | isl_int max; |
4385 | isl_vec *csol; |
4386 | |
4387 | if (node->nvar <= 1) |
4388 | return node->nvar; |
4389 | |
4390 | csol = extract_var_coef(node, sol); |
4391 | if (!csol) |
4392 | return -1; |
4393 | isl_int_init(max)isl_sioimath_init((max)); |
4394 | for (i = 0; i < node->nvar; ++i) { |
4395 | isl_val *v; |
4396 | |
4397 | if (isl_int_is_zero(csol->el[i])(isl_sioimath_sgn(*(csol->el[i])) == 0)) |
4398 | continue; |
4399 | v = isl_multi_val_get_val(node->sizes, i); |
4400 | if (!v) |
4401 | goto error; |
4402 | if (!isl_val_is_int(v)) { |
4403 | isl_val_free(v); |
4404 | continue; |
4405 | } |
4406 | v = isl_val_div_ui(v, 2); |
4407 | v = isl_val_ceil(v); |
4408 | if (!v) |
4409 | goto error; |
4410 | isl_int_mul(max, v->n, csol->el[i])isl_sioimath_mul((max), *(v->n), *(csol->el[i])); |
4411 | isl_val_free(v); |
4412 | |
4413 | for (j = 0; j < node->nvar; ++j) { |
4414 | if (j == i) |
4415 | continue; |
4416 | if (isl_int_abs_gt(csol->el[j], max)(isl_sioimath_abs_cmp(*(csol->el[j]), *(max)) > 0)) |
4417 | break; |
4418 | } |
4419 | if (j < node->nvar) |
4420 | break; |
4421 | } |
4422 | |
4423 | isl_int_clear(max)isl_sioimath_clear((max)); |
4424 | isl_vec_free(csol); |
4425 | return i; |
4426 | error: |
4427 | isl_int_clear(max)isl_sioimath_clear((max)); |
4428 | isl_vec_free(csol); |
4429 | return -1; |
4430 | } |
4431 | |
4432 | /* Force the schedule coefficient at position "pos" of "node" to be zero |
4433 | * in "tl". |
4434 | * The coefficient is encoded as the difference between two non-negative |
4435 | * variables. Force these two variables to have the same value. |
4436 | */ |
4437 | static __isl_give isl_tab_lexmin *zero_out_node_coef( |
4438 | __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos) |
4439 | { |
4440 | int dim; |
4441 | isl_ctx *ctx; |
4442 | isl_vec *eq; |
4443 | |
4444 | ctx = isl_space_get_ctx(node->space); |
4445 | dim = isl_tab_lexmin_dim(tl); |
4446 | if (dim < 0) |
4447 | return isl_tab_lexmin_free(tl); |
4448 | eq = isl_vec_alloc(ctx, 1 + dim); |
4449 | eq = isl_vec_clr(eq); |
4450 | if (!eq) |
4451 | return isl_tab_lexmin_free(tl); |
4452 | |
4453 | pos = 1 + node_var_coef_pos(node, pos); |
4454 | isl_int_set_si(eq->el[pos], 1)isl_sioimath_set_si((eq->el[pos]), 1); |
4455 | isl_int_set_si(eq->el[pos + 1], -1)isl_sioimath_set_si((eq->el[pos + 1]), -1); |
4456 | tl = isl_tab_lexmin_add_eq(tl, eq->el); |
4457 | isl_vec_free(eq); |
4458 | |
4459 | return tl; |
4460 | } |
4461 | |
4462 | /* Return the lexicographically smallest rational point in the basic set |
4463 | * from which "tl" was constructed, double checking that this input set |
4464 | * was not empty. |
4465 | */ |
4466 | static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl) |
4467 | { |
4468 | isl_vec *sol; |
4469 | |
4470 | sol = isl_tab_lexmin_get_solution(tl); |
4471 | if (!sol) |
4472 | return NULL((void*)0); |
4473 | if (sol->size == 0) |
4474 | isl_die(isl_vec_get_ctx(sol), isl_error_internal,do { isl_handle_error(isl_vec_get_ctx(sol), isl_error_internal , "error in schedule construction", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4476); return isl_vec_free(sol); } while (0) |
4475 | "error in schedule construction",do { isl_handle_error(isl_vec_get_ctx(sol), isl_error_internal , "error in schedule construction", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4476); return isl_vec_free(sol); } while (0) |
4476 | return isl_vec_free(sol))do { isl_handle_error(isl_vec_get_ctx(sol), isl_error_internal , "error in schedule construction", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4476); return isl_vec_free(sol); } while (0); |
4477 | return sol; |
4478 | } |
4479 | |
4480 | /* Does the solution "sol" of the LP problem constructed by setup_carry_lp |
4481 | * carry any of the "n_edge" groups of dependences? |
4482 | * The value in the first position is the sum of (1 - e_i) over all "n_edge" |
4483 | * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented |
4484 | * by the edge are carried by the solution. |
4485 | * If the sum of the (1 - e_i) is smaller than "n_edge" then at least |
4486 | * one of those is carried. |
4487 | * |
4488 | * Note that despite the fact that the problem is solved using a rational |
4489 | * solver, the solution is guaranteed to be integral. |
4490 | * Specifically, the dependence distance lower bounds e_i (and therefore |
4491 | * also their sum) are integers. See Lemma 5 of [1]. |
4492 | * |
4493 | * Any potential denominator of the sum is cleared by this function. |
4494 | * The denominator is not relevant for any of the other elements |
4495 | * in the solution. |
4496 | * |
4497 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
4498 | * Problem, Part II: Multi-Dimensional Time. |
4499 | * In Intl. Journal of Parallel Programming, 1992. |
4500 | */ |
4501 | static int carries_dependences(__isl_keep isl_vec *sol, int n_edge) |
4502 | { |
4503 | isl_int_divexact(sol->el[1], sol->el[1], sol->el[0])isl_sioimath_tdiv_q((sol->el[1]), *(sol->el[1]), *(sol-> el[0])); |
4504 | isl_int_set_si(sol->el[0], 1)isl_sioimath_set_si((sol->el[0]), 1); |
4505 | return isl_int_cmp_si(sol->el[1], n_edge)isl_sioimath_cmp_si(*(sol->el[1]), n_edge) < 0; |
4506 | } |
4507 | |
4508 | /* Return the lexicographically smallest rational point in "lp", |
4509 | * assuming that all variables are non-negative and performing some |
4510 | * additional sanity checks. |
4511 | * If "want_integral" is set, then compute the lexicographically smallest |
4512 | * integer point instead. |
4513 | * In particular, "lp" should not be empty by construction. |
4514 | * Double check that this is the case. |
4515 | * If dependences are not carried for any of the "n_edge" edges, |
4516 | * then return an empty vector. |
4517 | * |
4518 | * If the schedule_treat_coalescing option is set and |
4519 | * if the computed schedule performs loop coalescing on a given node, |
4520 | * i.e., if it is of the form |
4521 | * |
4522 | * c_i i + c_j j + ... |
4523 | * |
4524 | * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero |
4525 | * to cut out this solution. Repeat this process until no more loop |
4526 | * coalescing occurs or until no more dependences can be carried. |
4527 | * In the latter case, revert to the previously computed solution. |
4528 | * |
4529 | * If the caller requests an integral solution and if coalescing should |
4530 | * be treated, then perform the coalescing treatment first as |
4531 | * an integral solution computed before coalescing treatment |
4532 | * would carry the same number of edges and would therefore probably |
4533 | * also be coalescing. |
4534 | * |
4535 | * To allow the coalescing treatment to be performed first, |
4536 | * the initial solution is allowed to be rational and it is only |
4537 | * cut out (if needed) in the next iteration, if no coalescing measures |
4538 | * were taken. |
4539 | */ |
4540 | static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph, |
4541 | __isl_take isl_basic_setisl_basic_map *lp, int n_edge, int want_integral) |
4542 | { |
4543 | int i, pos, cut; |
4544 | isl_ctx *ctx; |
4545 | isl_tab_lexmin *tl; |
4546 | isl_vec *sol = NULL((void*)0), *prev; |
4547 | int treat_coalescing; |
4548 | int try_again; |
4549 | |
4550 | if (!lp) |
4551 | return NULL((void*)0); |
4552 | ctx = isl_basic_set_get_ctx(lp); |
4553 | treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx); |
4554 | tl = isl_tab_lexmin_from_basic_set(lp); |
4555 | |
4556 | cut = 0; |
4557 | do { |
4558 | int integral; |
4559 | |
4560 | try_again = 0; |
4561 | if (cut) |
4562 | tl = isl_tab_lexmin_cut_to_integer(tl); |
4563 | prev = sol; |
4564 | sol = non_empty_solution(tl); |
4565 | if (!sol) |
4566 | goto error; |
4567 | |
4568 | integral = isl_int_is_one(sol->el[0])(isl_sioimath_cmp_si(*(sol->el[0]), 1) == 0); |
4569 | if (!carries_dependences(sol, n_edge)) { |
4570 | if (!prev) |
4571 | prev = isl_vec_alloc(ctx, 0); |
4572 | isl_vec_free(sol); |
4573 | sol = prev; |
4574 | break; |
4575 | } |
4576 | prev = isl_vec_free(prev); |
4577 | cut = want_integral && !integral; |
4578 | if (cut) |
4579 | try_again = 1; |
4580 | if (!treat_coalescing) |
4581 | continue; |
4582 | for (i = 0; i < graph->n; ++i) { |
4583 | struct isl_sched_node *node = &graph->node[i]; |
4584 | |
4585 | pos = find_node_coalescing(node, sol); |
4586 | if (pos < 0) |
4587 | goto error; |
4588 | if (pos < node->nvar) |
4589 | break; |
4590 | } |
4591 | if (i < graph->n) { |
4592 | try_again = 1; |
4593 | tl = zero_out_node_coef(tl, &graph->node[i], pos); |
4594 | cut = 0; |
4595 | } |
4596 | } while (try_again); |
4597 | |
4598 | isl_tab_lexmin_free(tl); |
4599 | |
4600 | return sol; |
4601 | error: |
4602 | isl_tab_lexmin_free(tl); |
4603 | isl_vec_free(prev); |
4604 | isl_vec_free(sol); |
4605 | return NULL((void*)0); |
4606 | } |
4607 | |
4608 | /* If "edge" is an edge from a node to itself, then add the corresponding |
4609 | * dependence relation to "umap". |
4610 | * If "node" has been compressed, then the dependence relation |
4611 | * is also compressed first. |
4612 | */ |
4613 | static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap, |
4614 | struct isl_sched_edge *edge) |
4615 | { |
4616 | isl_map *map; |
4617 | struct isl_sched_node *node = edge->src; |
4618 | |
4619 | if (edge->src != edge->dst) |
4620 | return umap; |
4621 | |
4622 | map = isl_map_copy(edge->map); |
4623 | if (node->compressed) { |
4624 | map = isl_map_preimage_domain_multi_aff(map, |
4625 | isl_multi_aff_copy(node->decompress)); |
4626 | map = isl_map_preimage_range_multi_aff(map, |
4627 | isl_multi_aff_copy(node->decompress)); |
4628 | } |
4629 | umap = isl_union_map_add_map(umap, map); |
4630 | return umap; |
4631 | } |
4632 | |
4633 | /* If "edge" is an edge from a node to another node, then add the corresponding |
4634 | * dependence relation to "umap". |
4635 | * If the source or destination nodes of "edge" have been compressed, |
4636 | * then the dependence relation is also compressed first. |
4637 | */ |
4638 | static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap, |
4639 | struct isl_sched_edge *edge) |
4640 | { |
4641 | isl_map *map; |
4642 | |
4643 | if (edge->src == edge->dst) |
4644 | return umap; |
4645 | |
4646 | map = isl_map_copy(edge->map); |
4647 | if (edge->src->compressed) |
4648 | map = isl_map_preimage_domain_multi_aff(map, |
4649 | isl_multi_aff_copy(edge->src->decompress)); |
4650 | if (edge->dst->compressed) |
4651 | map = isl_map_preimage_range_multi_aff(map, |
4652 | isl_multi_aff_copy(edge->dst->decompress)); |
4653 | umap = isl_union_map_add_map(umap, map); |
4654 | return umap; |
4655 | } |
4656 | |
4657 | /* Internal data structure used by union_drop_coalescing_constraints |
4658 | * to collect bounds on all relevant statements. |
4659 | * |
4660 | * "graph" is the schedule constraint graph for which an LP problem |
4661 | * is being constructed. |
4662 | * "bounds" collects the bounds. |
4663 | */ |
4664 | struct isl_collect_bounds_data { |
4665 | isl_ctx *ctx; |
4666 | struct isl_sched_graph *graph; |
4667 | isl_union_set *bounds; |
4668 | }; |
4669 | |
4670 | /* Add the size bounds for the node with instance deltas in "set" |
4671 | * to data->bounds. |
4672 | */ |
4673 | static isl_stat collect_bounds(__isl_take isl_setisl_map *set, void *user) |
4674 | { |
4675 | struct isl_collect_bounds_data *data = user; |
4676 | struct isl_sched_node *node; |
4677 | isl_space *space; |
4678 | isl_setisl_map *bounds; |
4679 | |
4680 | space = isl_set_get_space(set); |
4681 | isl_set_free(set); |
4682 | |
4683 | node = graph_find_compressed_node(data->ctx, data->graph, space); |
4684 | isl_space_free(space); |
4685 | |
4686 | bounds = isl_set_from_basic_set(get_size_bounds(node)); |
4687 | data->bounds = isl_union_set_add_set(data->bounds, bounds); |
4688 | |
4689 | return isl_stat_ok; |
4690 | } |
4691 | |
4692 | /* Drop some constraints from "delta" that could be exploited |
4693 | * to construct loop coalescing schedules. |
4694 | * In particular, drop those constraint that bound the difference |
4695 | * to the size of the domain. |
4696 | * Do this for each set/node in "delta" separately. |
4697 | * The parameters are assumed to have been projected out by the caller. |
4698 | */ |
4699 | static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx, |
4700 | struct isl_sched_graph *graph, __isl_take isl_union_set *delta) |
4701 | { |
4702 | struct isl_collect_bounds_data data = { ctx, graph }; |
4703 | |
4704 | data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); |
4705 | if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0) |
4706 | data.bounds = isl_union_set_free(data.bounds); |
4707 | delta = isl_union_set_plain_gist(delta, data.bounds); |
4708 | |
4709 | return delta; |
4710 | } |
4711 | |
4712 | /* Given a non-trivial lineality space "lineality", add the corresponding |
4713 | * universe set to data->mask and add a map from elements to |
4714 | * other elements along the lines in "lineality" to data->equivalent. |
4715 | * If this is the first time this function gets called |
4716 | * (data->any_non_trivial is still false), then set data->any_non_trivial and |
4717 | * initialize data->mask and data->equivalent. |
4718 | * |
4719 | * In particular, if the lineality space is defined by equality constraints |
4720 | * |
4721 | * E x = 0 |
4722 | * |
4723 | * then construct an affine mapping |
4724 | * |
4725 | * f : x -> E x |
4726 | * |
4727 | * and compute the equivalence relation of having the same image under f: |
4728 | * |
4729 | * { x -> x' : E x = E x' } |
4730 | */ |
4731 | static isl_stat add_non_trivial_lineality(__isl_take isl_basic_setisl_basic_map *lineality, |
4732 | struct isl_exploit_lineality_data *data) |
4733 | { |
4734 | isl_mat *eq; |
4735 | isl_space *space; |
4736 | isl_setisl_map *univ; |
4737 | isl_multi_aff *ma; |
4738 | isl_multi_pw_aff *mpa; |
4739 | isl_map *map; |
4740 | int n; |
4741 | |
4742 | if (!lineality) |
4743 | return isl_stat_error; |
4744 | if (isl_basic_set_dim(lineality, isl_dim_div) != 0) |
4745 | isl_die(isl_basic_set_get_ctx(lineality), isl_error_internal,do { isl_handle_error(isl_basic_set_get_ctx(lineality), isl_error_internal , "local variables not allowed", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4746); goto error; } while (0) |
4746 | "local variables not allowed", goto error)do { isl_handle_error(isl_basic_set_get_ctx(lineality), isl_error_internal , "local variables not allowed", "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 4746); goto error; } while (0); |
4747 | |
4748 | space = isl_basic_set_get_space(lineality); |
4749 | if (!data->any_non_trivial) { |
4750 | data->equivalent = isl_union_map_empty(isl_space_copy(space)); |
4751 | data->mask = isl_union_set_empty(isl_space_copy(space)); |
4752 | } |
4753 | data->any_non_trivial = isl_bool_true; |
4754 | |
4755 | univ = isl_set_universe(isl_space_copy(space)); |
4756 | data->mask = isl_union_set_add_set(data->mask, univ); |
4757 | |
4758 | eq = isl_basic_set_extract_equalities(lineality); |
4759 | n = isl_mat_rows(eq); |
4760 | eq = isl_mat_insert_zero_rows(eq, 0, 1); |
4761 | eq = isl_mat_set_element_si(eq, 0, 0, 1); |
4762 | space = isl_space_from_domain(space); |
4763 | space = isl_space_add_dims(space, isl_dim_out, n); |
4764 | ma = isl_multi_aff_from_aff_mat(space, eq); |
4765 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
4766 | map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa)); |
4767 | data->equivalent = isl_union_map_add_map(data->equivalent, map); |
4768 | |
4769 | isl_basic_set_free(lineality); |
4770 | return isl_stat_ok; |
4771 | error: |
4772 | isl_basic_set_free(lineality); |
4773 | return isl_stat_error; |
4774 | } |
4775 | |
4776 | /* Check if the lineality space "set" is non-trivial (i.e., is not just |
4777 | * the origin or, in other words, satisfies a number of equality constraints |
4778 | * that is smaller than the dimension of the set). |
4779 | * If so, extend data->mask and data->equivalent accordingly. |
4780 | * |
4781 | * The input should not have any local variables already, but |
4782 | * isl_set_remove_divs is called to make sure it does not. |
4783 | */ |
4784 | static isl_stat add_lineality(__isl_take isl_setisl_map *set, void *user) |
4785 | { |
4786 | struct isl_exploit_lineality_data *data = user; |
4787 | isl_basic_setisl_basic_map *hull; |
4788 | int dim, n_eq; |
4789 | |
4790 | set = isl_set_remove_divs(set); |
4791 | hull = isl_set_unshifted_simple_hull(set); |
4792 | dim = isl_basic_set_dim(hull, isl_dim_set); |
4793 | n_eq = isl_basic_set_n_equality(hull); |
4794 | if (!hull) |
4795 | return isl_stat_error; |
4796 | if (dim != n_eq) |
4797 | return add_non_trivial_lineality(hull, data); |
4798 | isl_basic_set_free(hull); |
4799 | return isl_stat_ok; |
4800 | } |
4801 | |
4802 | /* Check if the difference set on intra-node schedule constraints "intra" |
4803 | * has any non-trivial lineality space. |
4804 | * If so, then extend the difference set to a difference set |
4805 | * on equivalent elements. That is, if "intra" is |
4806 | * |
4807 | * { y - x : (x,y) \in V } |
4808 | * |
4809 | * and elements are equivalent if they have the same image under f, |
4810 | * then return |
4811 | * |
4812 | * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
4813 | * |
4814 | * or, since f is linear, |
4815 | * |
4816 | * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') } |
4817 | * |
4818 | * The results of the search for non-trivial lineality spaces is stored |
4819 | * in "data". |
4820 | */ |
4821 | static __isl_give isl_union_set *exploit_intra_lineality( |
4822 | __isl_take isl_union_set *intra, |
4823 | struct isl_exploit_lineality_data *data) |
4824 | { |
4825 | isl_union_set *lineality; |
4826 | isl_union_set *uset; |
4827 | |
4828 | data->any_non_trivial = isl_bool_false; |
4829 | lineality = isl_union_set_copy(intra); |
4830 | lineality = isl_union_set_combined_lineality_space(lineality); |
4831 | if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0) |
4832 | data->any_non_trivial = isl_bool_error; |
4833 | isl_union_set_free(lineality); |
4834 | |
4835 | if (data->any_non_trivial < 0) |
4836 | return isl_union_set_free(intra); |
4837 | if (!data->any_non_trivial) |
4838 | return intra; |
4839 | |
4840 | uset = isl_union_set_copy(intra); |
4841 | intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask)); |
4842 | uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent)); |
4843 | intra = isl_union_set_union(intra, uset); |
4844 | |
4845 | intra = isl_union_set_remove_divs(intra); |
4846 | |
4847 | return intra; |
4848 | } |
4849 | |
4850 | /* If the difference set on intra-node schedule constraints was found to have |
4851 | * any non-trivial lineality space by exploit_intra_lineality, |
4852 | * as recorded in "data", then extend the inter-node |
4853 | * schedule constraints "inter" to schedule constraints on equivalent elements. |
4854 | * That is, if "inter" is V and |
4855 | * elements are equivalent if they have the same image under f, then return |
4856 | * |
4857 | * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
4858 | */ |
4859 | static __isl_give isl_union_map *exploit_inter_lineality( |
4860 | __isl_take isl_union_map *inter, |
4861 | struct isl_exploit_lineality_data *data) |
4862 | { |
4863 | isl_union_map *umap; |
4864 | |
4865 | if (data->any_non_trivial < 0) |
4866 | return isl_union_map_free(inter); |
4867 | if (!data->any_non_trivial) |
4868 | return inter; |
4869 | |
4870 | umap = isl_union_map_copy(inter); |
4871 | inter = isl_union_map_subtract_range(inter, |
4872 | isl_union_set_copy(data->mask)); |
4873 | umap = isl_union_map_apply_range(umap, |
4874 | isl_union_map_copy(data->equivalent)); |
4875 | inter = isl_union_map_union(inter, umap); |
4876 | umap = isl_union_map_copy(inter); |
4877 | inter = isl_union_map_subtract_domain(inter, |
4878 | isl_union_set_copy(data->mask)); |
4879 | umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent), |
4880 | umap); |
4881 | inter = isl_union_map_union(inter, umap); |
4882 | |
4883 | inter = isl_union_map_remove_divs(inter); |
4884 | |
4885 | return inter; |
4886 | } |
4887 | |
4888 | /* For each (conditional) validity edge in "graph", |
4889 | * add the corresponding dependence relation using "add" |
4890 | * to a collection of dependence relations and return the result. |
4891 | * If "coincidence" is set, then coincidence edges are considered as well. |
4892 | */ |
4893 | static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph, |
4894 | __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap, |
4895 | struct isl_sched_edge *edge), int coincidence) |
4896 | { |
4897 | int i; |
4898 | isl_space *space; |
4899 | isl_union_map *umap; |
4900 | |
4901 | space = isl_space_copy(graph->node[0].space); |
4902 | umap = isl_union_map_empty(space); |
4903 | |
4904 | for (i = 0; i < graph->n_edge; ++i) { |
4905 | struct isl_sched_edge *edge = &graph->edge[i]; |
4906 | |
4907 | if (!is_any_validity(edge) && |
4908 | (!coincidence || !is_coincidence(edge))) |
4909 | continue; |
4910 | |
4911 | umap = add(umap, edge); |
4912 | } |
4913 | |
4914 | return umap; |
4915 | } |
4916 | |
4917 | /* Project out all parameters from "uset" and return the result. |
4918 | */ |
4919 | static __isl_give isl_union_set *union_set_drop_parameters( |
4920 | __isl_take isl_union_set *uset) |
4921 | { |
4922 | unsigned nparam; |
4923 | |
4924 | nparam = isl_union_set_dim(uset, isl_dim_param); |
4925 | return isl_union_set_project_out(uset, isl_dim_param, 0, nparam); |
4926 | } |
4927 | |
4928 | /* For each dependence relation on a (conditional) validity edge |
4929 | * from a node to itself, |
4930 | * construct the set of coefficients of valid constraints for elements |
4931 | * in that dependence relation and collect the results. |
4932 | * If "coincidence" is set, then coincidence edges are considered as well. |
4933 | * |
4934 | * In particular, for each dependence relation R, constraints |
4935 | * on coefficients (c_0, c_x) are constructed such that |
4936 | * |
4937 | * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
4938 | * |
4939 | * If the schedule_treat_coalescing option is set, then some constraints |
4940 | * that could be exploited to construct coalescing schedules |
4941 | * are removed before the dual is computed, but after the parameters |
4942 | * have been projected out. |
4943 | * The entire computation is essentially the same as that performed |
4944 | * by intra_coefficients, except that it operates on multiple |
4945 | * edges together and that the parameters are always projected out. |
4946 | * |
4947 | * Additionally, exploit any non-trivial lineality space |
4948 | * in the difference set after removing coalescing constraints and |
4949 | * store the results of the non-trivial lineality space detection in "data". |
4950 | * The procedure is currently run unconditionally, but it is unlikely |
4951 | * to find any non-trivial lineality spaces if no coalescing constraints |
4952 | * have been removed. |
4953 | * |
4954 | * Note that if a dependence relation is a union of basic maps, |
4955 | * then each basic map needs to be treated individually as it may only |
4956 | * be possible to carry the dependences expressed by some of those |
4957 | * basic maps and not all of them. |
4958 | * The collected validity constraints are therefore not coalesced and |
4959 | * it is assumed that they are not coalesced automatically. |
4960 | * Duplicate basic maps can be removed, however. |
4961 | * In particular, if the same basic map appears as a disjunct |
4962 | * in multiple edges, then it only needs to be carried once. |
4963 | */ |
4964 | static __isl_give isl_basic_set_listisl_basic_map_list *collect_intra_validity(isl_ctx *ctx, |
4965 | struct isl_sched_graph *graph, int coincidence, |
4966 | struct isl_exploit_lineality_data *data) |
4967 | { |
4968 | isl_union_map *intra; |
4969 | isl_union_set *delta; |
4970 | isl_basic_set_listisl_basic_map_list *list; |
4971 | |
4972 | intra = collect_validity(graph, &add_intra, coincidence); |
4973 | delta = isl_union_map_deltas(intra); |
4974 | delta = union_set_drop_parameters(delta); |
4975 | delta = isl_union_set_remove_divs(delta); |
4976 | if (isl_options_get_schedule_treat_coalescing(ctx)) |
4977 | delta = union_drop_coalescing_constraints(ctx, graph, delta); |
4978 | delta = exploit_intra_lineality(delta, data); |
4979 | list = isl_union_set_get_basic_set_list(delta); |
4980 | isl_union_set_free(delta); |
4981 | |
4982 | return isl_basic_set_list_coefficients(list); |
4983 | } |
4984 | |
4985 | /* For each dependence relation on a (conditional) validity edge |
4986 | * from a node to some other node, |
4987 | * construct the set of coefficients of valid constraints for elements |
4988 | * in that dependence relation and collect the results. |
4989 | * If "coincidence" is set, then coincidence edges are considered as well. |
4990 | * |
4991 | * In particular, for each dependence relation R, constraints |
4992 | * on coefficients (c_0, c_n, c_x, c_y) are constructed such that |
4993 | * |
4994 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
4995 | * |
4996 | * This computation is essentially the same as that performed |
4997 | * by inter_coefficients, except that it operates on multiple |
4998 | * edges together. |
4999 | * |
5000 | * Additionally, exploit any non-trivial lineality space |
5001 | * that may have been discovered by collect_intra_validity |
5002 | * (as stored in "data"). |
5003 | * |
5004 | * Note that if a dependence relation is a union of basic maps, |
5005 | * then each basic map needs to be treated individually as it may only |
5006 | * be possible to carry the dependences expressed by some of those |
5007 | * basic maps and not all of them. |
5008 | * The collected validity constraints are therefore not coalesced and |
5009 | * it is assumed that they are not coalesced automatically. |
5010 | * Duplicate basic maps can be removed, however. |
5011 | * In particular, if the same basic map appears as a disjunct |
5012 | * in multiple edges, then it only needs to be carried once. |
5013 | */ |
5014 | static __isl_give isl_basic_set_listisl_basic_map_list *collect_inter_validity( |
5015 | struct isl_sched_graph *graph, int coincidence, |
5016 | struct isl_exploit_lineality_data *data) |
5017 | { |
5018 | isl_union_map *inter; |
5019 | isl_union_set *wrap; |
5020 | isl_basic_set_listisl_basic_map_list *list; |
5021 | |
5022 | inter = collect_validity(graph, &add_inter, coincidence); |
5023 | inter = exploit_inter_lineality(inter, data); |
5024 | inter = isl_union_map_remove_divs(inter); |
5025 | wrap = isl_union_map_wrap(inter); |
5026 | list = isl_union_set_get_basic_set_list(wrap); |
5027 | isl_union_set_free(wrap); |
5028 | return isl_basic_set_list_coefficients(list); |
5029 | } |
5030 | |
5031 | /* Construct an LP problem for finding schedule coefficients |
5032 | * such that the schedule carries as many of the "n_edge" groups of |
5033 | * dependences as possible based on the corresponding coefficient |
5034 | * constraints and return the lexicographically smallest non-trivial solution. |
5035 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
5036 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
5037 | * If "want_integral" is set, then compute an integral solution |
5038 | * for the coefficients rather than using the numerators |
5039 | * of a rational solution. |
5040 | * "carry_inter" indicates whether inter-node edges should be carried or |
5041 | * only respected. |
5042 | * |
5043 | * If none of the "n_edge" groups can be carried |
5044 | * then return an empty vector. |
5045 | */ |
5046 | static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, |
5047 | struct isl_sched_graph *graph, int n_edge, |
5048 | __isl_keep isl_basic_set_listisl_basic_map_list *intra, |
5049 | __isl_keep isl_basic_set_listisl_basic_map_list *inter, int want_integral, |
5050 | int carry_inter) |
5051 | { |
5052 | isl_basic_setisl_basic_map *lp; |
5053 | |
5054 | if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0) |
5055 | return NULL((void*)0); |
5056 | |
5057 | lp = isl_basic_set_copy(graph->lp); |
5058 | return non_neg_lexmin(graph, lp, n_edge, want_integral); |
5059 | } |
5060 | |
5061 | /* Construct an LP problem for finding schedule coefficients |
5062 | * such that the schedule carries as many of the validity dependences |
5063 | * as possible and |
5064 | * return the lexicographically smallest non-trivial solution. |
5065 | * If "fallback" is set, then the carrying is performed as a fallback |
5066 | * for the Pluto-like scheduler. |
5067 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5068 | * |
5069 | * The variable "n_edge" stores the number of groups that should be carried. |
5070 | * If none of the "n_edge" groups can be carried |
5071 | * then return an empty vector. |
5072 | * If, moreover, "n_edge" is zero, then the LP problem does not even |
5073 | * need to be constructed. |
5074 | * |
5075 | * If a fallback solution is being computed, then compute an integral solution |
5076 | * for the coefficients rather than using the numerators |
5077 | * of a rational solution. |
5078 | * |
5079 | * If a fallback solution is being computed, if there are any intra-node |
5080 | * dependences, and if requested by the user, then first try |
5081 | * to only carry those intra-node dependences. |
5082 | * If this fails to carry any dependences, then try again |
5083 | * with the inter-node dependences included. |
5084 | */ |
5085 | static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, |
5086 | struct isl_sched_graph *graph, int fallback, int coincidence) |
5087 | { |
5088 | int n_intra, n_inter; |
5089 | int n_edge; |
5090 | struct isl_carry carry = { 0 }; |
5091 | isl_vec *sol; |
5092 | |
5093 | carry.intra = collect_intra_validity(ctx, graph, coincidence, |
5094 | &carry.lineality); |
5095 | carry.inter = collect_inter_validity(graph, coincidence, |
5096 | &carry.lineality); |
5097 | if (!carry.intra || !carry.inter) |
5098 | goto error; |
5099 | n_intra = isl_basic_set_list_n_basic_set(carry.intra); |
5100 | n_inter = isl_basic_set_list_n_basic_set(carry.inter); |
5101 | |
5102 | if (fallback && n_intra > 0 && |
5103 | isl_options_get_schedule_carry_self_first(ctx)) { |
5104 | sol = compute_carrying_sol_coef(ctx, graph, n_intra, |
5105 | carry.intra, carry.inter, fallback, 0); |
5106 | if (!sol || sol->size != 0 || n_inter == 0) { |
5107 | isl_carry_clear(&carry); |
5108 | return sol; |
5109 | } |
5110 | isl_vec_free(sol); |
5111 | } |
5112 | |
5113 | n_edge = n_intra + n_inter; |
5114 | if (n_edge == 0) { |
5115 | isl_carry_clear(&carry); |
5116 | return isl_vec_alloc(ctx, 0); |
5117 | } |
5118 | |
5119 | sol = compute_carrying_sol_coef(ctx, graph, n_edge, |
5120 | carry.intra, carry.inter, fallback, 1); |
5121 | isl_carry_clear(&carry); |
5122 | return sol; |
5123 | error: |
5124 | isl_carry_clear(&carry); |
5125 | return NULL((void*)0); |
5126 | } |
5127 | |
5128 | /* Construct a schedule row for each node such that as many validity dependences |
5129 | * as possible are carried and then continue with the next band. |
5130 | * If "fallback" is set, then the carrying is performed as a fallback |
5131 | * for the Pluto-like scheduler. |
5132 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5133 | * |
5134 | * If there are no validity dependences, then no dependence can be carried and |
5135 | * the procedure is guaranteed to fail. If there is more than one component, |
5136 | * then try computing a schedule on each component separately |
5137 | * to prevent or at least postpone this failure. |
5138 | * |
5139 | * If a schedule row is computed, then check that dependences are carried |
5140 | * for at least one of the edges. |
5141 | * |
5142 | * If the computed schedule row turns out to be trivial on one or |
5143 | * more nodes where it should not be trivial, then we throw it away |
5144 | * and try again on each component separately. |
5145 | * |
5146 | * If there is only one component, then we accept the schedule row anyway, |
5147 | * but we do not consider it as a complete row and therefore do not |
5148 | * increment graph->n_row. Note that the ranks of the nodes that |
5149 | * do get a non-trivial schedule part will get updated regardless and |
5150 | * graph->maxvar is computed based on these ranks. The test for |
5151 | * whether more schedule rows are required in compute_schedule_wcc |
5152 | * is therefore not affected. |
5153 | * |
5154 | * Insert a band corresponding to the schedule row at position "node" |
5155 | * of the schedule tree and continue with the construction of the schedule. |
5156 | * This insertion and the continued construction is performed by split_scaled |
5157 | * after optionally checking for non-trivial common divisors. |
5158 | */ |
5159 | static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node, |
5160 | struct isl_sched_graph *graph, int fallback, int coincidence) |
5161 | { |
5162 | int trivial; |
5163 | isl_ctx *ctx; |
5164 | isl_vec *sol; |
5165 | |
5166 | if (!node) |
5167 | return NULL((void*)0); |
5168 | |
5169 | ctx = isl_schedule_node_get_ctx(node); |
5170 | sol = compute_carrying_sol(ctx, graph, fallback, coincidence); |
5171 | if (!sol) |
5172 | return isl_schedule_node_free(node); |
5173 | if (sol->size == 0) { |
5174 | isl_vec_free(sol); |
5175 | if (graph->scc > 1) |
5176 | return compute_component_schedule(node, graph, 1); |
5177 | isl_die(ctx, isl_error_unknown, "unable to carry dependences",do { isl_handle_error(ctx, isl_error_unknown, "unable to carry dependences" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5178); return isl_schedule_node_free(node); } while (0) |
5178 | return isl_schedule_node_free(node))do { isl_handle_error(ctx, isl_error_unknown, "unable to carry dependences" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5178); return isl_schedule_node_free(node); } while (0); |
5179 | } |
5180 | |
5181 | trivial = is_any_trivial(graph, sol); |
5182 | if (trivial < 0) { |
5183 | sol = isl_vec_free(sol); |
5184 | } else if (trivial && graph->scc > 1) { |
5185 | isl_vec_free(sol); |
5186 | return compute_component_schedule(node, graph, 1); |
5187 | } |
5188 | |
5189 | if (update_schedule(graph, sol, 0) < 0) |
5190 | return isl_schedule_node_free(node); |
5191 | if (trivial) |
5192 | graph->n_row--; |
5193 | |
5194 | return split_scaled(node, graph); |
5195 | } |
5196 | |
5197 | /* Construct a schedule row for each node such that as many validity dependences |
5198 | * as possible are carried and then continue with the next band. |
5199 | * Do so as a fallback for the Pluto-like scheduler. |
5200 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5201 | */ |
5202 | static __isl_give isl_schedule_node *carry_fallback( |
5203 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5204 | int coincidence) |
5205 | { |
5206 | return carry(node, graph, 1, coincidence); |
5207 | } |
5208 | |
5209 | /* Construct a schedule row for each node such that as many validity dependences |
5210 | * as possible are carried and then continue with the next band. |
5211 | * Do so for the case where the Feautrier scheduler was selected |
5212 | * by the user. |
5213 | */ |
5214 | static __isl_give isl_schedule_node *carry_feautrier( |
5215 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5216 | { |
5217 | return carry(node, graph, 0, 0); |
5218 | } |
5219 | |
5220 | /* Construct a schedule row for each node such that as many validity dependences |
5221 | * as possible are carried and then continue with the next band. |
5222 | * Do so as a fallback for the Pluto-like scheduler. |
5223 | */ |
5224 | static __isl_give isl_schedule_node *carry_dependences( |
5225 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5226 | { |
5227 | return carry_fallback(node, graph, 0); |
5228 | } |
5229 | |
5230 | /* Construct a schedule row for each node such that as many validity or |
5231 | * coincidence dependences as possible are carried and |
5232 | * then continue with the next band. |
5233 | * Do so as a fallback for the Pluto-like scheduler. |
5234 | */ |
5235 | static __isl_give isl_schedule_node *carry_coincidence( |
5236 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5237 | { |
5238 | return carry_fallback(node, graph, 1); |
5239 | } |
5240 | |
5241 | /* Topologically sort statements mapped to the same schedule iteration |
5242 | * and add insert a sequence node in front of "node" |
5243 | * corresponding to this order. |
5244 | * If "initialized" is set, then it may be assumed that compute_maxvar |
5245 | * has been called on the current band. Otherwise, call |
5246 | * compute_maxvar if and before carry_dependences gets called. |
5247 | * |
5248 | * If it turns out to be impossible to sort the statements apart, |
5249 | * because different dependences impose different orderings |
5250 | * on the statements, then we extend the schedule such that |
5251 | * it carries at least one more dependence. |
5252 | */ |
5253 | static __isl_give isl_schedule_node *sort_statements( |
5254 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5255 | int initialized) |
5256 | { |
5257 | isl_ctx *ctx; |
5258 | isl_union_set_list *filters; |
5259 | |
5260 | if (!node) |
5261 | return NULL((void*)0); |
5262 | |
5263 | ctx = isl_schedule_node_get_ctx(node); |
5264 | if (graph->n < 1) |
5265 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "graph should have at least one node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5267); return isl_schedule_node_free(node); } while (0) |
5266 | "graph should have at least one node",do { isl_handle_error(ctx, isl_error_internal, "graph should have at least one node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5267); return isl_schedule_node_free(node); } while (0) |
5267 | return isl_schedule_node_free(node))do { isl_handle_error(ctx, isl_error_internal, "graph should have at least one node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5267); return isl_schedule_node_free(node); } while (0); |
5268 | |
5269 | if (graph->n == 1) |
5270 | return node; |
5271 | |
5272 | if (update_edges(ctx, graph) < 0) |
5273 | return isl_schedule_node_free(node); |
5274 | |
5275 | if (graph->n_edge == 0) |
5276 | return node; |
5277 | |
5278 | if (detect_sccs(ctx, graph) < 0) |
5279 | return isl_schedule_node_free(node); |
5280 | |
5281 | next_band(graph); |
5282 | if (graph->scc < graph->n) { |
5283 | if (!initialized && compute_maxvar(graph) < 0) |
5284 | return isl_schedule_node_free(node); |
5285 | return carry_dependences(node, graph); |
5286 | } |
5287 | |
5288 | filters = extract_sccs(ctx, graph); |
5289 | node = isl_schedule_node_insert_sequence(node, filters); |
5290 | |
5291 | return node; |
5292 | } |
5293 | |
5294 | /* Are there any (non-empty) (conditional) validity edges in the graph? |
5295 | */ |
5296 | static int has_validity_edges(struct isl_sched_graph *graph) |
5297 | { |
5298 | int i; |
5299 | |
5300 | for (i = 0; i < graph->n_edge; ++i) { |
5301 | int empty; |
5302 | |
5303 | empty = isl_map_plain_is_empty(graph->edge[i].map); |
5304 | if (empty < 0) |
5305 | return -1; |
5306 | if (empty) |
5307 | continue; |
5308 | if (is_any_validity(&graph->edge[i])) |
5309 | return 1; |
5310 | } |
5311 | |
5312 | return 0; |
5313 | } |
5314 | |
5315 | /* Should we apply a Feautrier step? |
5316 | * That is, did the user request the Feautrier algorithm and are |
5317 | * there any validity dependences (left)? |
5318 | */ |
5319 | static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) |
5320 | { |
5321 | if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER1) |
5322 | return 0; |
5323 | |
5324 | return has_validity_edges(graph); |
5325 | } |
5326 | |
5327 | /* Compute a schedule for a connected dependence graph using Feautrier's |
5328 | * multi-dimensional scheduling algorithm and return the updated schedule node. |
5329 | * |
5330 | * The original algorithm is described in [1]. |
5331 | * The main idea is to minimize the number of scheduling dimensions, by |
5332 | * trying to satisfy as many dependences as possible per scheduling dimension. |
5333 | * |
5334 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
5335 | * Problem, Part II: Multi-Dimensional Time. |
5336 | * In Intl. Journal of Parallel Programming, 1992. |
5337 | */ |
5338 | static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier( |
5339 | isl_schedule_node *node, struct isl_sched_graph *graph) |
5340 | { |
5341 | return carry_feautrier(node, graph); |
5342 | } |
5343 | |
5344 | /* Turn off the "local" bit on all (condition) edges. |
5345 | */ |
5346 | static void clear_local_edges(struct isl_sched_graph *graph) |
5347 | { |
5348 | int i; |
5349 | |
5350 | for (i = 0; i < graph->n_edge; ++i) |
5351 | if (is_condition(&graph->edge[i])) |
5352 | clear_local(&graph->edge[i]); |
5353 | } |
5354 | |
5355 | /* Does "graph" have both condition and conditional validity edges? |
5356 | */ |
5357 | static int need_condition_check(struct isl_sched_graph *graph) |
5358 | { |
5359 | int i; |
5360 | int any_condition = 0; |
5361 | int any_conditional_validity = 0; |
5362 | |
5363 | for (i = 0; i < graph->n_edge; ++i) { |
5364 | if (is_condition(&graph->edge[i])) |
5365 | any_condition = 1; |
5366 | if (is_conditional_validity(&graph->edge[i])) |
5367 | any_conditional_validity = 1; |
5368 | } |
5369 | |
5370 | return any_condition && any_conditional_validity; |
5371 | } |
5372 | |
5373 | /* Does "graph" contain any coincidence edge? |
5374 | */ |
5375 | static int has_any_coincidence(struct isl_sched_graph *graph) |
5376 | { |
5377 | int i; |
5378 | |
5379 | for (i = 0; i < graph->n_edge; ++i) |
5380 | if (is_coincidence(&graph->edge[i])) |
5381 | return 1; |
5382 | |
5383 | return 0; |
5384 | } |
5385 | |
5386 | /* Extract the final schedule row as a map with the iteration domain |
5387 | * of "node" as domain. |
5388 | */ |
5389 | static __isl_give isl_map *final_row(struct isl_sched_node *node) |
5390 | { |
5391 | isl_multi_aff *ma; |
5392 | int row; |
5393 | |
5394 | row = isl_mat_rows(node->sched) - 1; |
5395 | ma = node_extract_partial_schedule_multi_aff(node, row, 1); |
5396 | return isl_map_from_multi_aff(ma); |
5397 | } |
5398 | |
5399 | /* Is the conditional validity dependence in the edge with index "edge_index" |
5400 | * violated by the latest (i.e., final) row of the schedule? |
5401 | * That is, is i scheduled after j |
5402 | * for any conditional validity dependence i -> j? |
5403 | */ |
5404 | static int is_violated(struct isl_sched_graph *graph, int edge_index) |
5405 | { |
5406 | isl_map *src_sched, *dst_sched, *map; |
5407 | struct isl_sched_edge *edge = &graph->edge[edge_index]; |
5408 | int empty; |
5409 | |
5410 | src_sched = final_row(edge->src); |
5411 | dst_sched = final_row(edge->dst); |
5412 | map = isl_map_copy(edge->map); |
5413 | map = isl_map_apply_domain(map, src_sched); |
5414 | map = isl_map_apply_range(map, dst_sched); |
5415 | map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0); |
5416 | empty = isl_map_is_empty(map); |
5417 | isl_map_free(map); |
5418 | |
5419 | if (empty < 0) |
5420 | return -1; |
5421 | |
5422 | return !empty; |
5423 | } |
5424 | |
5425 | /* Does "graph" have any satisfied condition edges that |
5426 | * are adjacent to the conditional validity constraint with |
5427 | * domain "conditional_source" and range "conditional_sink"? |
5428 | * |
5429 | * A satisfied condition is one that is not local. |
5430 | * If a condition was forced to be local already (i.e., marked as local) |
5431 | * then there is no need to check if it is in fact local. |
5432 | * |
5433 | * Additionally, mark all adjacent condition edges found as local. |
5434 | */ |
5435 | static int has_adjacent_true_conditions(struct isl_sched_graph *graph, |
5436 | __isl_keep isl_union_set *conditional_source, |
5437 | __isl_keep isl_union_set *conditional_sink) |
5438 | { |
5439 | int i; |
5440 | int any = 0; |
5441 | |
5442 | for (i = 0; i < graph->n_edge; ++i) { |
5443 | int adjacent, local; |
5444 | isl_union_map *condition; |
5445 | |
5446 | if (!is_condition(&graph->edge[i])) |
5447 | continue; |
5448 | if (is_local(&graph->edge[i])) |
5449 | continue; |
5450 | |
5451 | condition = graph->edge[i].tagged_condition; |
5452 | adjacent = domain_intersects(condition, conditional_sink); |
5453 | if (adjacent >= 0 && !adjacent) |
5454 | adjacent = range_intersects(condition, |
5455 | conditional_source); |
5456 | if (adjacent < 0) |
5457 | return -1; |
5458 | if (!adjacent) |
5459 | continue; |
5460 | |
5461 | set_local(&graph->edge[i]); |
5462 | |
5463 | local = is_condition_false(&graph->edge[i]); |
5464 | if (local < 0) |
5465 | return -1; |
5466 | if (!local) |
5467 | any = 1; |
5468 | } |
5469 | |
5470 | return any; |
5471 | } |
5472 | |
5473 | /* Are there any violated conditional validity dependences with |
5474 | * adjacent condition dependences that are not local with respect |
5475 | * to the current schedule? |
5476 | * That is, is the conditional validity constraint violated? |
5477 | * |
5478 | * Additionally, mark all those adjacent condition dependences as local. |
5479 | * We also mark those adjacent condition dependences that were not marked |
5480 | * as local before, but just happened to be local already. This ensures |
5481 | * that they remain local if the schedule is recomputed. |
5482 | * |
5483 | * We first collect domain and range of all violated conditional validity |
5484 | * dependences and then check if there are any adjacent non-local |
5485 | * condition dependences. |
5486 | */ |
5487 | static int has_violated_conditional_constraint(isl_ctx *ctx, |
5488 | struct isl_sched_graph *graph) |
5489 | { |
5490 | int i; |
5491 | int any = 0; |
5492 | isl_union_set *source, *sink; |
5493 | |
5494 | source = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); |
5495 | sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0)); |
5496 | for (i = 0; i < graph->n_edge; ++i) { |
5497 | isl_union_set *uset; |
5498 | isl_union_map *umap; |
5499 | int violated; |
5500 | |
5501 | if (!is_conditional_validity(&graph->edge[i])) |
5502 | continue; |
5503 | |
5504 | violated = is_violated(graph, i); |
5505 | if (violated < 0) |
5506 | goto error; |
5507 | if (!violated) |
5508 | continue; |
5509 | |
5510 | any = 1; |
5511 | |
5512 | umap = isl_union_map_copy(graph->edge[i].tagged_validity); |
5513 | uset = isl_union_map_domain(umap); |
5514 | source = isl_union_set_union(source, uset); |
5515 | source = isl_union_set_coalesce(source); |
5516 | |
5517 | umap = isl_union_map_copy(graph->edge[i].tagged_validity); |
5518 | uset = isl_union_map_range(umap); |
5519 | sink = isl_union_set_union(sink, uset); |
5520 | sink = isl_union_set_coalesce(sink); |
5521 | } |
5522 | |
5523 | if (any) |
5524 | any = has_adjacent_true_conditions(graph, source, sink); |
5525 | |
5526 | isl_union_set_free(source); |
5527 | isl_union_set_free(sink); |
5528 | return any; |
5529 | error: |
5530 | isl_union_set_free(source); |
5531 | isl_union_set_free(sink); |
5532 | return -1; |
5533 | } |
5534 | |
5535 | /* Examine the current band (the rows between graph->band_start and |
5536 | * graph->n_total_row), deciding whether to drop it or add it to "node" |
5537 | * and then continue with the computation of the next band, if any. |
5538 | * If "initialized" is set, then it may be assumed that compute_maxvar |
5539 | * has been called on the current band. Otherwise, call |
5540 | * compute_maxvar if and before carry_dependences gets called. |
5541 | * |
5542 | * The caller keeps looking for a new row as long as |
5543 | * graph->n_row < graph->maxvar. If the latest attempt to find |
5544 | * such a row failed (i.e., we still have graph->n_row < graph->maxvar), |
5545 | * then we either |
5546 | * - split between SCCs and start over (assuming we found an interesting |
5547 | * pair of SCCs between which to split) |
5548 | * - continue with the next band (assuming the current band has at least |
5549 | * one row) |
5550 | * - if there is more than one SCC left, then split along all SCCs |
5551 | * - if outer coincidence needs to be enforced, then try to carry as many |
5552 | * validity or coincidence dependences as possible and |
5553 | * continue with the next band |
5554 | * - try to carry as many validity dependences as possible and |
5555 | * continue with the next band |
5556 | * In each case, we first insert a band node in the schedule tree |
5557 | * if any rows have been computed. |
5558 | * |
5559 | * If the caller managed to complete the schedule and the current band |
5560 | * is empty, then finish off by topologically |
5561 | * sorting the statements based on the remaining dependences. |
5562 | * If, on the other hand, the current band has at least one row, |
5563 | * then continue with the next band. Note that this next band |
5564 | * will necessarily be empty, but the graph may still be split up |
5565 | * into weakly connected components before arriving back here. |
5566 | */ |
5567 | static __isl_give isl_schedule_node *compute_schedule_finish_band( |
5568 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5569 | int initialized) |
5570 | { |
5571 | int empty; |
5572 | |
5573 | if (!node) |
5574 | return NULL((void*)0); |
5575 | |
5576 | empty = graph->n_total_row == graph->band_start; |
5577 | if (graph->n_row < graph->maxvar) { |
5578 | isl_ctx *ctx; |
5579 | |
5580 | ctx = isl_schedule_node_get_ctx(node); |
5581 | if (!ctx->opt->schedule_maximize_band_depth && !empty) |
5582 | return compute_next_band(node, graph, 1); |
5583 | if (graph->src_scc >= 0) |
5584 | return compute_split_schedule(node, graph); |
5585 | if (!empty) |
5586 | return compute_next_band(node, graph, 1); |
5587 | if (graph->scc > 1) |
5588 | return compute_component_schedule(node, graph, 1); |
5589 | if (!initialized && compute_maxvar(graph) < 0) |
5590 | return isl_schedule_node_free(node); |
5591 | if (isl_options_get_schedule_outer_coincidence(ctx)) |
5592 | return carry_coincidence(node, graph); |
5593 | return carry_dependences(node, graph); |
5594 | } |
5595 | |
5596 | if (!empty) |
5597 | return compute_next_band(node, graph, 1); |
5598 | return sort_statements(node, graph, initialized); |
5599 | } |
5600 | |
5601 | /* Construct a band of schedule rows for a connected dependence graph. |
5602 | * The caller is responsible for determining the strongly connected |
5603 | * components and calling compute_maxvar first. |
5604 | * |
5605 | * We try to find a sequence of as many schedule rows as possible that result |
5606 | * in non-negative dependence distances (independent of the previous rows |
5607 | * in the sequence, i.e., such that the sequence is tilable), with as |
5608 | * many of the initial rows as possible satisfying the coincidence constraints. |
5609 | * The computation stops if we can't find any more rows or if we have found |
5610 | * all the rows we wanted to find. |
5611 | * |
5612 | * If ctx->opt->schedule_outer_coincidence is set, then we force the |
5613 | * outermost dimension to satisfy the coincidence constraints. If this |
5614 | * turns out to be impossible, we fall back on the general scheme above |
5615 | * and try to carry as many dependences as possible. |
5616 | * |
5617 | * If "graph" contains both condition and conditional validity dependences, |
5618 | * then we need to check that that the conditional schedule constraint |
5619 | * is satisfied, i.e., there are no violated conditional validity dependences |
5620 | * that are adjacent to any non-local condition dependences. |
5621 | * If there are, then we mark all those adjacent condition dependences |
5622 | * as local and recompute the current band. Those dependences that |
5623 | * are marked local will then be forced to be local. |
5624 | * The initial computation is performed with no dependences marked as local. |
5625 | * If we are lucky, then there will be no violated conditional validity |
5626 | * dependences adjacent to any non-local condition dependences. |
5627 | * Otherwise, we mark some additional condition dependences as local and |
5628 | * recompute. We continue this process until there are no violations left or |
5629 | * until we are no longer able to compute a schedule. |
5630 | * Since there are only a finite number of dependences, |
5631 | * there will only be a finite number of iterations. |
5632 | */ |
5633 | static isl_stat compute_schedule_wcc_band(isl_ctx *ctx, |
5634 | struct isl_sched_graph *graph) |
5635 | { |
5636 | int has_coincidence; |
5637 | int use_coincidence; |
5638 | int force_coincidence = 0; |
5639 | int check_conditional; |
5640 | |
5641 | if (sort_sccs(graph) < 0) |
5642 | return isl_stat_error; |
5643 | |
5644 | clear_local_edges(graph); |
5645 | check_conditional = need_condition_check(graph); |
5646 | has_coincidence = has_any_coincidence(graph); |
5647 | |
5648 | if (ctx->opt->schedule_outer_coincidence) |
5649 | force_coincidence = 1; |
5650 | |
5651 | use_coincidence = has_coincidence; |
5652 | while (graph->n_row < graph->maxvar) { |
5653 | isl_vec *sol; |
5654 | int violated; |
5655 | int coincident; |
5656 | |
5657 | graph->src_scc = -1; |
5658 | graph->dst_scc = -1; |
5659 | |
5660 | if (setup_lp(ctx, graph, use_coincidence) < 0) |
5661 | return isl_stat_error; |
5662 | sol = solve_lp(ctx, graph); |
5663 | if (!sol) |
5664 | return isl_stat_error; |
5665 | if (sol->size == 0) { |
5666 | int empty = graph->n_total_row == graph->band_start; |
5667 | |
5668 | isl_vec_free(sol); |
5669 | if (use_coincidence && (!force_coincidence || !empty)) { |
5670 | use_coincidence = 0; |
5671 | continue; |
5672 | } |
5673 | return isl_stat_ok; |
5674 | } |
5675 | coincident = !has_coincidence || use_coincidence; |
5676 | if (update_schedule(graph, sol, coincident) < 0) |
5677 | return isl_stat_error; |
5678 | |
5679 | if (!check_conditional) |
5680 | continue; |
5681 | violated = has_violated_conditional_constraint(ctx, graph); |
5682 | if (violated < 0) |
5683 | return isl_stat_error; |
5684 | if (!violated) |
5685 | continue; |
5686 | if (reset_band(graph) < 0) |
5687 | return isl_stat_error; |
5688 | use_coincidence = has_coincidence; |
5689 | } |
5690 | |
5691 | return isl_stat_ok; |
5692 | } |
5693 | |
5694 | /* Compute a schedule for a connected dependence graph by considering |
5695 | * the graph as a whole and return the updated schedule node. |
5696 | * |
5697 | * The actual schedule rows of the current band are computed by |
5698 | * compute_schedule_wcc_band. compute_schedule_finish_band takes |
5699 | * care of integrating the band into "node" and continuing |
5700 | * the computation. |
5701 | */ |
5702 | static __isl_give isl_schedule_node *compute_schedule_wcc_whole( |
5703 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5704 | { |
5705 | isl_ctx *ctx; |
5706 | |
5707 | if (!node) |
5708 | return NULL((void*)0); |
5709 | |
5710 | ctx = isl_schedule_node_get_ctx(node); |
5711 | if (compute_schedule_wcc_band(ctx, graph) < 0) |
5712 | return isl_schedule_node_free(node); |
5713 | |
5714 | return compute_schedule_finish_band(node, graph, 1); |
5715 | } |
5716 | |
5717 | /* Clustering information used by compute_schedule_wcc_clustering. |
5718 | * |
5719 | * "n" is the number of SCCs in the original dependence graph |
5720 | * "scc" is an array of "n" elements, each representing an SCC |
5721 | * of the original dependence graph. All entries in the same cluster |
5722 | * have the same number of schedule rows. |
5723 | * "scc_cluster" maps each SCC index to the cluster to which it belongs, |
5724 | * where each cluster is represented by the index of the first SCC |
5725 | * in the cluster. Initially, each SCC belongs to a cluster containing |
5726 | * only that SCC. |
5727 | * |
5728 | * "scc_in_merge" is used by merge_clusters_along_edge to keep |
5729 | * track of which SCCs need to be merged. |
5730 | * |
5731 | * "cluster" contains the merged clusters of SCCs after the clustering |
5732 | * has completed. |
5733 | * |
5734 | * "scc_node" is a temporary data structure used inside copy_partial. |
5735 | * For each SCC, it keeps track of the number of nodes in the SCC |
5736 | * that have already been copied. |
5737 | */ |
5738 | struct isl_clustering { |
5739 | int n; |
5740 | struct isl_sched_graph *scc; |
5741 | struct isl_sched_graph *cluster; |
5742 | int *scc_cluster; |
5743 | int *scc_node; |
5744 | int *scc_in_merge; |
5745 | }; |
5746 | |
5747 | /* Initialize the clustering data structure "c" from "graph". |
5748 | * |
5749 | * In particular, allocate memory, extract the SCCs from "graph" |
5750 | * into c->scc, initialize scc_cluster and construct |
5751 | * a band of schedule rows for each SCC. |
5752 | * Within each SCC, there is only one SCC by definition. |
5753 | * Each SCC initially belongs to a cluster containing only that SCC. |
5754 | */ |
5755 | static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, |
5756 | struct isl_sched_graph *graph) |
5757 | { |
5758 | int i; |
5759 | |
5760 | c->n = graph->scc; |
5761 | c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n)((struct isl_sched_graph *)isl_calloc_or_die(ctx, c->n, sizeof (struct isl_sched_graph))); |
5762 | c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n)((struct isl_sched_graph *)isl_calloc_or_die(ctx, c->n, sizeof (struct isl_sched_graph))); |
5763 | c->scc_cluster = isl_calloc_array(ctx, int, c->n)((int *)isl_calloc_or_die(ctx, c->n, sizeof(int))); |
5764 | c->scc_node = isl_calloc_array(ctx, int, c->n)((int *)isl_calloc_or_die(ctx, c->n, sizeof(int))); |
5765 | c->scc_in_merge = isl_calloc_array(ctx, int, c->n)((int *)isl_calloc_or_die(ctx, c->n, sizeof(int))); |
5766 | if (!c->scc || !c->cluster || |
5767 | !c->scc_cluster || !c->scc_node || !c->scc_in_merge) |
5768 | return isl_stat_error; |
5769 | |
5770 | for (i = 0; i < c->n; ++i) { |
5771 | if (extract_sub_graph(ctx, graph, &node_scc_exactly, |
5772 | &edge_scc_exactly, i, &c->scc[i]) < 0) |
5773 | return isl_stat_error; |
5774 | c->scc[i].scc = 1; |
5775 | if (compute_maxvar(&c->scc[i]) < 0) |
5776 | return isl_stat_error; |
5777 | if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0) |
5778 | return isl_stat_error; |
5779 | c->scc_cluster[i] = i; |
5780 | } |
5781 | |
5782 | return isl_stat_ok; |
5783 | } |
5784 | |
5785 | /* Free all memory allocated for "c". |
5786 | */ |
5787 | static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) |
5788 | { |
5789 | int i; |
5790 | |
5791 | if (c->scc) |
5792 | for (i = 0; i < c->n; ++i) |
5793 | graph_free(ctx, &c->scc[i]); |
5794 | free(c->scc); |
5795 | if (c->cluster) |
5796 | for (i = 0; i < c->n; ++i) |
5797 | graph_free(ctx, &c->cluster[i]); |
5798 | free(c->cluster); |
5799 | free(c->scc_cluster); |
5800 | free(c->scc_node); |
5801 | free(c->scc_in_merge); |
5802 | } |
5803 | |
5804 | /* Should we refrain from merging the cluster in "graph" with |
5805 | * any other cluster? |
5806 | * In particular, is its current schedule band empty and incomplete. |
5807 | */ |
5808 | static int bad_cluster(struct isl_sched_graph *graph) |
5809 | { |
5810 | return graph->n_row < graph->maxvar && |
5811 | graph->n_total_row == graph->band_start; |
5812 | } |
5813 | |
5814 | /* Is "edge" a proximity edge with a non-empty dependence relation? |
5815 | */ |
5816 | static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge) |
5817 | { |
5818 | if (!is_proximity(edge)) |
5819 | return isl_bool_false; |
5820 | return isl_bool_not(isl_map_plain_is_empty(edge->map)); |
5821 | } |
5822 | |
5823 | /* Return the index of an edge in "graph" that can be used to merge |
5824 | * two clusters in "c". |
5825 | * Return graph->n_edge if no such edge can be found. |
5826 | * Return -1 on error. |
5827 | * |
5828 | * In particular, return a proximity edge between two clusters |
5829 | * that is not marked "no_merge" and such that neither of the |
5830 | * two clusters has an incomplete, empty band. |
5831 | * |
5832 | * If there are multiple such edges, then try and find the most |
5833 | * appropriate edge to use for merging. In particular, pick the edge |
5834 | * with the greatest weight. If there are multiple of those, |
5835 | * then pick one with the shortest distance between |
5836 | * the two cluster representatives. |
5837 | */ |
5838 | static int find_proximity(struct isl_sched_graph *graph, |
5839 | struct isl_clustering *c) |
5840 | { |
5841 | int i, best = graph->n_edge, best_dist, best_weight; |
5842 | |
5843 | for (i = 0; i < graph->n_edge; ++i) { |
5844 | struct isl_sched_edge *edge = &graph->edge[i]; |
5845 | int dist, weight; |
5846 | isl_bool prox; |
5847 | |
5848 | prox = is_non_empty_proximity(edge); |
5849 | if (prox < 0) |
5850 | return -1; |
5851 | if (!prox) |
5852 | continue; |
5853 | if (edge->no_merge) |
5854 | continue; |
5855 | if (bad_cluster(&c->scc[edge->src->scc]) || |
5856 | bad_cluster(&c->scc[edge->dst->scc])) |
5857 | continue; |
5858 | dist = c->scc_cluster[edge->dst->scc] - |
5859 | c->scc_cluster[edge->src->scc]; |
5860 | if (dist == 0) |
5861 | continue; |
5862 | weight = edge->weight; |
5863 | if (best < graph->n_edge) { |
5864 | if (best_weight > weight) |
5865 | continue; |
5866 | if (best_weight == weight && best_dist <= dist) |
5867 | continue; |
5868 | } |
5869 | best = i; |
5870 | best_dist = dist; |
5871 | best_weight = weight; |
5872 | } |
5873 | |
5874 | return best; |
5875 | } |
5876 | |
5877 | /* Internal data structure used in mark_merge_sccs. |
5878 | * |
5879 | * "graph" is the dependence graph in which a strongly connected |
5880 | * component is constructed. |
5881 | * "scc_cluster" maps each SCC index to the cluster to which it belongs. |
5882 | * "src" and "dst" are the indices of the nodes that are being merged. |
5883 | */ |
5884 | struct isl_mark_merge_sccs_data { |
5885 | struct isl_sched_graph *graph; |
5886 | int *scc_cluster; |
5887 | int src; |
5888 | int dst; |
5889 | }; |
5890 | |
5891 | /* Check whether the cluster containing node "i" depends on the cluster |
5892 | * containing node "j". If "i" and "j" belong to the same cluster, |
5893 | * then they are taken to depend on each other to ensure that |
5894 | * the resulting strongly connected component consists of complete |
5895 | * clusters. Furthermore, if "i" and "j" are the two nodes that |
5896 | * are being merged, then they are taken to depend on each other as well. |
5897 | * Otherwise, check if there is a (conditional) validity dependence |
5898 | * from node[j] to node[i], forcing node[i] to follow node[j]. |
5899 | */ |
5900 | static isl_bool cluster_follows(int i, int j, void *user) |
5901 | { |
5902 | struct isl_mark_merge_sccs_data *data = user; |
5903 | struct isl_sched_graph *graph = data->graph; |
5904 | int *scc_cluster = data->scc_cluster; |
5905 | |
5906 | if (data->src == i && data->dst == j) |
5907 | return isl_bool_true; |
5908 | if (data->src == j && data->dst == i) |
5909 | return isl_bool_true; |
5910 | if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) |
5911 | return isl_bool_true; |
5912 | |
5913 | return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); |
5914 | } |
5915 | |
5916 | /* Mark all SCCs that belong to either of the two clusters in "c" |
5917 | * connected by the edge in "graph" with index "edge", or to any |
5918 | * of the intermediate clusters. |
5919 | * The marking is recorded in c->scc_in_merge. |
5920 | * |
5921 | * The given edge has been selected for merging two clusters, |
5922 | * meaning that there is at least a proximity edge between the two nodes. |
5923 | * However, there may also be (indirect) validity dependences |
5924 | * between the two nodes. When merging the two clusters, all clusters |
5925 | * containing one or more of the intermediate nodes along the |
5926 | * indirect validity dependences need to be merged in as well. |
5927 | * |
5928 | * First collect all such nodes by computing the strongly connected |
5929 | * component (SCC) containing the two nodes connected by the edge, where |
5930 | * the two nodes are considered to depend on each other to make |
5931 | * sure they end up in the same SCC. Similarly, each node is considered |
5932 | * to depend on every other node in the same cluster to ensure |
5933 | * that the SCC consists of complete clusters. |
5934 | * |
5935 | * Then the original SCCs that contain any of these nodes are marked |
5936 | * in c->scc_in_merge. |
5937 | */ |
5938 | static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, |
5939 | int edge, struct isl_clustering *c) |
5940 | { |
5941 | struct isl_mark_merge_sccs_data data; |
5942 | struct isl_tarjan_graph *g; |
5943 | int i; |
5944 | |
5945 | for (i = 0; i < c->n; ++i) |
5946 | c->scc_in_merge[i] = 0; |
5947 | |
5948 | data.graph = graph; |
5949 | data.scc_cluster = c->scc_cluster; |
5950 | data.src = graph->edge[edge].src - graph->node; |
5951 | data.dst = graph->edge[edge].dst - graph->node; |
5952 | |
5953 | g = isl_tarjan_graph_component(ctx, graph->n, data.dst, |
5954 | &cluster_follows, &data); |
5955 | if (!g) |
5956 | goto error; |
5957 | |
5958 | i = g->op; |
5959 | if (i < 3) |
5960 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "expecting at least two nodes in component" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5962); goto error; } while (0) |
5961 | "expecting at least two nodes in component",do { isl_handle_error(ctx, isl_error_internal, "expecting at least two nodes in component" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5962); goto error; } while (0) |
5962 | goto error)do { isl_handle_error(ctx, isl_error_internal, "expecting at least two nodes in component" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5962); goto error; } while (0); |
5963 | if (g->order[--i] != -1) |
5964 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "expecting end of component marker" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5965); goto error; } while (0) |
5965 | "expecting end of component marker", goto error)do { isl_handle_error(ctx, isl_error_internal, "expecting end of component marker" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 5965); goto error; } while (0); |
5966 | |
5967 | for (--i; i >= 0 && g->order[i] != -1; --i) { |
5968 | int scc = graph->node[g->order[i]].scc; |
5969 | c->scc_in_merge[scc] = 1; |
5970 | } |
5971 | |
5972 | isl_tarjan_graph_free(g); |
5973 | return isl_stat_ok; |
5974 | error: |
5975 | isl_tarjan_graph_free(g); |
5976 | return isl_stat_error; |
5977 | } |
5978 | |
5979 | /* Construct the identifier "cluster_i". |
5980 | */ |
5981 | static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) |
5982 | { |
5983 | char name[40]; |
5984 | |
5985 | snprintf(name, sizeof(name), "cluster_%d", i); |
5986 | return isl_id_alloc(ctx, name, NULL((void*)0)); |
5987 | } |
5988 | |
5989 | /* Construct the space of the cluster with index "i" containing |
5990 | * the strongly connected component "scc". |
5991 | * |
5992 | * In particular, construct a space called cluster_i with dimension equal |
5993 | * to the number of schedule rows in the current band of "scc". |
5994 | */ |
5995 | static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) |
5996 | { |
5997 | int nvar; |
5998 | isl_space *space; |
5999 | isl_id *id; |
6000 | |
6001 | nvar = scc->n_total_row - scc->band_start; |
6002 | space = isl_space_copy(scc->node[0].space); |
6003 | space = isl_space_params(space); |
6004 | space = isl_space_set_from_params(space); |
6005 | space = isl_space_add_dims(space, isl_dim_set, nvar); |
6006 | id = cluster_id(isl_space_get_ctx(space), i); |
6007 | space = isl_space_set_tuple_id(space, isl_dim_set, id); |
6008 | |
6009 | return space; |
6010 | } |
6011 | |
6012 | /* Collect the domain of the graph for merging clusters. |
6013 | * |
6014 | * In particular, for each cluster with first SCC "i", construct |
6015 | * a set in the space called cluster_i with dimension equal |
6016 | * to the number of schedule rows in the current band of the cluster. |
6017 | */ |
6018 | static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, |
6019 | struct isl_sched_graph *graph, struct isl_clustering *c) |
6020 | { |
6021 | int i; |
6022 | isl_space *space; |
6023 | isl_union_set *domain; |
6024 | |
6025 | space = isl_space_params_alloc(ctx, 0); |
6026 | domain = isl_union_set_empty(space); |
6027 | |
6028 | for (i = 0; i < graph->scc; ++i) { |
6029 | isl_space *space; |
6030 | |
6031 | if (!c->scc_in_merge[i]) |
6032 | continue; |
6033 | if (c->scc_cluster[i] != i) |
6034 | continue; |
6035 | space = cluster_space(&c->scc[i], i); |
6036 | domain = isl_union_set_add_set(domain, isl_set_universe(space)); |
6037 | } |
6038 | |
6039 | return domain; |
6040 | } |
6041 | |
6042 | /* Construct a map from the original instances to the corresponding |
6043 | * cluster instance in the current bands of the clusters in "c". |
6044 | */ |
6045 | static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, |
6046 | struct isl_sched_graph *graph, struct isl_clustering *c) |
6047 | { |
6048 | int i, j; |
6049 | isl_space *space; |
6050 | isl_union_map *cluster_map; |
6051 | |
6052 | space = isl_space_params_alloc(ctx, 0); |
6053 | cluster_map = isl_union_map_empty(space); |
6054 | for (i = 0; i < graph->scc; ++i) { |
6055 | int start, n; |
6056 | isl_id *id; |
6057 | |
6058 | if (!c->scc_in_merge[i]) |
6059 | continue; |
6060 | |
6061 | id = cluster_id(ctx, c->scc_cluster[i]); |
6062 | start = c->scc[i].band_start; |
6063 | n = c->scc[i].n_total_row - start; |
6064 | for (j = 0; j < c->scc[i].n; ++j) { |
6065 | isl_multi_aff *ma; |
6066 | isl_map *map; |
6067 | struct isl_sched_node *node = &c->scc[i].node[j]; |
6068 | |
6069 | ma = node_extract_partial_schedule_multi_aff(node, |
6070 | start, n); |
6071 | ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, |
6072 | isl_id_copy(id)); |
6073 | map = isl_map_from_multi_aff(ma); |
6074 | cluster_map = isl_union_map_add_map(cluster_map, map); |
6075 | } |
6076 | isl_id_free(id); |
6077 | } |
6078 | |
6079 | return cluster_map; |
6080 | } |
6081 | |
6082 | /* Add "umap" to the schedule constraints "sc" of all types of "edge" |
6083 | * that are not isl_edge_condition or isl_edge_conditional_validity. |
6084 | */ |
6085 | static __isl_give isl_schedule_constraints *add_non_conditional_constraints( |
6086 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
6087 | __isl_take isl_schedule_constraints *sc) |
6088 | { |
6089 | enum isl_edge_type t; |
6090 | |
6091 | if (!sc) |
6092 | return NULL((void*)0); |
6093 | |
6094 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
6095 | if (t == isl_edge_condition || |
6096 | t == isl_edge_conditional_validity) |
6097 | continue; |
6098 | if (!is_type(edge, t)) |
6099 | continue; |
6100 | sc = isl_schedule_constraints_add(sc, t, |
6101 | isl_union_map_copy(umap)); |
6102 | } |
6103 | |
6104 | return sc; |
6105 | } |
6106 | |
6107 | /* Add schedule constraints of types isl_edge_condition and |
6108 | * isl_edge_conditional_validity to "sc" by applying "umap" to |
6109 | * the domains of the wrapped relations in domain and range |
6110 | * of the corresponding tagged constraints of "edge". |
6111 | */ |
6112 | static __isl_give isl_schedule_constraints *add_conditional_constraints( |
6113 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
6114 | __isl_take isl_schedule_constraints *sc) |
6115 | { |
6116 | enum isl_edge_type t; |
6117 | isl_union_map *tagged; |
6118 | |
6119 | for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { |
6120 | if (!is_type(edge, t)) |
6121 | continue; |
6122 | if (t == isl_edge_condition) |
6123 | tagged = isl_union_map_copy(edge->tagged_condition); |
6124 | else |
6125 | tagged = isl_union_map_copy(edge->tagged_validity); |
6126 | tagged = isl_union_map_zip(tagged); |
6127 | tagged = isl_union_map_apply_domain(tagged, |
6128 | isl_union_map_copy(umap)); |
6129 | tagged = isl_union_map_zip(tagged); |
6130 | sc = isl_schedule_constraints_add(sc, t, tagged); |
6131 | if (!sc) |
6132 | return NULL((void*)0); |
6133 | } |
6134 | |
6135 | return sc; |
6136 | } |
6137 | |
6138 | /* Given a mapping "cluster_map" from the original instances to |
6139 | * the cluster instances, add schedule constraints on the clusters |
6140 | * to "sc" corresponding to the original constraints represented by "edge". |
6141 | * |
6142 | * For non-tagged dependence constraints, the cluster constraints |
6143 | * are obtained by applying "cluster_map" to the edge->map. |
6144 | * |
6145 | * For tagged dependence constraints, "cluster_map" needs to be applied |
6146 | * to the domains of the wrapped relations in domain and range |
6147 | * of the tagged dependence constraints. Pick out the mappings |
6148 | * from these domains from "cluster_map" and construct their product. |
6149 | * This mapping can then be applied to the pair of domains. |
6150 | */ |
6151 | static __isl_give isl_schedule_constraints *collect_edge_constraints( |
6152 | struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, |
6153 | __isl_take isl_schedule_constraints *sc) |
6154 | { |
6155 | isl_union_map *umap; |
6156 | isl_space *space; |
6157 | isl_union_set *uset; |
6158 | isl_union_map *umap1, *umap2; |
6159 | |
6160 | if (!sc) |
6161 | return NULL((void*)0); |
6162 | |
6163 | umap = isl_union_map_from_map(isl_map_copy(edge->map)); |
6164 | umap = isl_union_map_apply_domain(umap, |
6165 | isl_union_map_copy(cluster_map)); |
6166 | umap = isl_union_map_apply_range(umap, |
6167 | isl_union_map_copy(cluster_map)); |
6168 | sc = add_non_conditional_constraints(edge, umap, sc); |
6169 | isl_union_map_free(umap); |
6170 | |
6171 | if (!sc || (!is_condition(edge) && !is_conditional_validity(edge))) |
6172 | return sc; |
6173 | |
6174 | space = isl_space_domain(isl_map_get_space(edge->map)); |
6175 | uset = isl_union_set_from_set(isl_set_universe(space)); |
6176 | umap1 = isl_union_map_copy(cluster_map); |
6177 | umap1 = isl_union_map_intersect_domain(umap1, uset); |
6178 | space = isl_space_range(isl_map_get_space(edge->map)); |
6179 | uset = isl_union_set_from_set(isl_set_universe(space)); |
6180 | umap2 = isl_union_map_copy(cluster_map); |
6181 | umap2 = isl_union_map_intersect_domain(umap2, uset); |
6182 | umap = isl_union_map_product(umap1, umap2); |
6183 | |
6184 | sc = add_conditional_constraints(edge, umap, sc); |
6185 | |
6186 | isl_union_map_free(umap); |
6187 | return sc; |
6188 | } |
6189 | |
6190 | /* Given a mapping "cluster_map" from the original instances to |
6191 | * the cluster instances, add schedule constraints on the clusters |
6192 | * to "sc" corresponding to all edges in "graph" between nodes that |
6193 | * belong to SCCs that are marked for merging in "scc_in_merge". |
6194 | */ |
6195 | static __isl_give isl_schedule_constraints *collect_constraints( |
6196 | struct isl_sched_graph *graph, int *scc_in_merge, |
6197 | __isl_keep isl_union_map *cluster_map, |
6198 | __isl_take isl_schedule_constraints *sc) |
6199 | { |
6200 | int i; |
6201 | |
6202 | for (i = 0; i < graph->n_edge; ++i) { |
6203 | struct isl_sched_edge *edge = &graph->edge[i]; |
6204 | |
6205 | if (!scc_in_merge[edge->src->scc]) |
6206 | continue; |
6207 | if (!scc_in_merge[edge->dst->scc]) |
6208 | continue; |
6209 | sc = collect_edge_constraints(edge, cluster_map, sc); |
6210 | } |
6211 | |
6212 | return sc; |
6213 | } |
6214 | |
6215 | /* Construct a dependence graph for scheduling clusters with respect |
6216 | * to each other and store the result in "merge_graph". |
6217 | * In particular, the nodes of the graph correspond to the schedule |
6218 | * dimensions of the current bands of those clusters that have been |
6219 | * marked for merging in "c". |
6220 | * |
6221 | * First construct an isl_schedule_constraints object for this domain |
6222 | * by transforming the edges in "graph" to the domain. |
6223 | * Then initialize a dependence graph for scheduling from these |
6224 | * constraints. |
6225 | */ |
6226 | static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, |
6227 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
6228 | { |
6229 | isl_union_set *domain; |
6230 | isl_union_map *cluster_map; |
6231 | isl_schedule_constraints *sc; |
6232 | isl_stat r; |
6233 | |
6234 | domain = collect_domain(ctx, graph, c); |
6235 | sc = isl_schedule_constraints_on_domain(domain); |
6236 | if (!sc) |
6237 | return isl_stat_error; |
6238 | cluster_map = collect_cluster_map(ctx, graph, c); |
6239 | sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc); |
6240 | isl_union_map_free(cluster_map); |
6241 | |
6242 | r = graph_init(merge_graph, sc); |
6243 | |
6244 | isl_schedule_constraints_free(sc); |
6245 | |
6246 | return r; |
6247 | } |
6248 | |
6249 | /* Compute the maximal number of remaining schedule rows that still need |
6250 | * to be computed for the nodes that belong to clusters with the maximal |
6251 | * dimension for the current band (i.e., the band that is to be merged). |
6252 | * Only clusters that are about to be merged are considered. |
6253 | * "maxvar" is the maximal dimension for the current band. |
6254 | * "c" contains information about the clusters. |
6255 | * |
6256 | * Return the maximal number of remaining schedule rows or -1 on error. |
6257 | */ |
6258 | static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) |
6259 | { |
6260 | int i, j; |
6261 | int max_slack; |
6262 | |
6263 | max_slack = 0; |
6264 | for (i = 0; i < c->n; ++i) { |
6265 | int nvar; |
6266 | struct isl_sched_graph *scc; |
6267 | |
6268 | if (!c->scc_in_merge[i]) |
6269 | continue; |
6270 | scc = &c->scc[i]; |
6271 | nvar = scc->n_total_row - scc->band_start; |
6272 | if (nvar != maxvar) |
6273 | continue; |
6274 | for (j = 0; j < scc->n; ++j) { |
6275 | struct isl_sched_node *node = &scc->node[j]; |
6276 | int slack; |
6277 | |
6278 | if (node_update_vmap(node) < 0) |
6279 | return -1; |
6280 | slack = node->nvar - node->rank; |
6281 | if (slack > max_slack) |
6282 | max_slack = slack; |
6283 | } |
6284 | } |
6285 | |
6286 | return max_slack; |
6287 | } |
6288 | |
6289 | /* If there are any clusters where the dimension of the current band |
6290 | * (i.e., the band that is to be merged) is smaller than "maxvar" and |
6291 | * if there are any nodes in such a cluster where the number |
6292 | * of remaining schedule rows that still need to be computed |
6293 | * is greater than "max_slack", then return the smallest current band |
6294 | * dimension of all these clusters. Otherwise return the original value |
6295 | * of "maxvar". Return -1 in case of any error. |
6296 | * Only clusters that are about to be merged are considered. |
6297 | * "c" contains information about the clusters. |
6298 | */ |
6299 | static int limit_maxvar_to_slack(int maxvar, int max_slack, |
6300 | struct isl_clustering *c) |
6301 | { |
6302 | int i, j; |
6303 | |
6304 | for (i = 0; i < c->n; ++i) { |
6305 | int nvar; |
6306 | struct isl_sched_graph *scc; |
6307 | |
6308 | if (!c->scc_in_merge[i]) |
6309 | continue; |
6310 | scc = &c->scc[i]; |
6311 | nvar = scc->n_total_row - scc->band_start; |
6312 | if (nvar >= maxvar) |
6313 | continue; |
6314 | for (j = 0; j < scc->n; ++j) { |
6315 | struct isl_sched_node *node = &scc->node[j]; |
6316 | int slack; |
6317 | |
6318 | if (node_update_vmap(node) < 0) |
6319 | return -1; |
6320 | slack = node->nvar - node->rank; |
6321 | if (slack > max_slack) { |
6322 | maxvar = nvar; |
6323 | break; |
6324 | } |
6325 | } |
6326 | } |
6327 | |
6328 | return maxvar; |
6329 | } |
6330 | |
6331 | /* Adjust merge_graph->maxvar based on the number of remaining schedule rows |
6332 | * that still need to be computed. In particular, if there is a node |
6333 | * in a cluster where the dimension of the current band is smaller |
6334 | * than merge_graph->maxvar, but the number of remaining schedule rows |
6335 | * is greater than that of any node in a cluster with the maximal |
6336 | * dimension for the current band (i.e., merge_graph->maxvar), |
6337 | * then adjust merge_graph->maxvar to the (smallest) current band dimension |
6338 | * of those clusters. Without this adjustment, the total number of |
6339 | * schedule dimensions would be increased, resulting in a skewed view |
6340 | * of the number of coincident dimensions. |
6341 | * "c" contains information about the clusters. |
6342 | * |
6343 | * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, |
6344 | * then there is no point in attempting any merge since it will be rejected |
6345 | * anyway. Set merge_graph->maxvar to zero in such cases. |
6346 | */ |
6347 | static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, |
6348 | struct isl_sched_graph *merge_graph, struct isl_clustering *c) |
6349 | { |
6350 | int max_slack, maxvar; |
6351 | |
6352 | max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c); |
6353 | if (max_slack < 0) |
6354 | return isl_stat_error; |
6355 | maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c); |
6356 | if (maxvar < 0) |
6357 | return isl_stat_error; |
6358 | |
6359 | if (maxvar < merge_graph->maxvar) { |
6360 | if (isl_options_get_schedule_maximize_band_depth(ctx)) |
6361 | merge_graph->maxvar = 0; |
6362 | else |
6363 | merge_graph->maxvar = maxvar; |
6364 | } |
6365 | |
6366 | return isl_stat_ok; |
6367 | } |
6368 | |
6369 | /* Return the number of coincident dimensions in the current band of "graph", |
6370 | * where the nodes of "graph" are assumed to be scheduled by a single band. |
6371 | */ |
6372 | static int get_n_coincident(struct isl_sched_graph *graph) |
6373 | { |
6374 | int i; |
6375 | |
6376 | for (i = graph->band_start; i < graph->n_total_row; ++i) |
6377 | if (!graph->node[0].coincident[i]) |
6378 | break; |
6379 | |
6380 | return i - graph->band_start; |
6381 | } |
6382 | |
6383 | /* Should the clusters be merged based on the cluster schedule |
6384 | * in the current (and only) band of "merge_graph", given that |
6385 | * coincidence should be maximized? |
6386 | * |
6387 | * If the number of coincident schedule dimensions in the merged band |
6388 | * would be less than the maximal number of coincident schedule dimensions |
6389 | * in any of the merged clusters, then the clusters should not be merged. |
6390 | */ |
6391 | static isl_bool ok_to_merge_coincident(struct isl_clustering *c, |
6392 | struct isl_sched_graph *merge_graph) |
6393 | { |
6394 | int i; |
6395 | int n_coincident; |
6396 | int max_coincident; |
6397 | |
6398 | max_coincident = 0; |
6399 | for (i = 0; i < c->n; ++i) { |
6400 | if (!c->scc_in_merge[i]) |
6401 | continue; |
6402 | n_coincident = get_n_coincident(&c->scc[i]); |
6403 | if (n_coincident > max_coincident) |
6404 | max_coincident = n_coincident; |
6405 | } |
6406 | |
6407 | n_coincident = get_n_coincident(merge_graph); |
6408 | |
6409 | return n_coincident >= max_coincident; |
6410 | } |
6411 | |
6412 | /* Return the transformation on "node" expressed by the current (and only) |
6413 | * band of "merge_graph" applied to the clusters in "c". |
6414 | * |
6415 | * First find the representation of "node" in its SCC in "c" and |
6416 | * extract the transformation expressed by the current band. |
6417 | * Then extract the transformation applied by "merge_graph" |
6418 | * to the cluster to which this SCC belongs. |
6419 | * Combine the two to obtain the complete transformation on the node. |
6420 | * |
6421 | * Note that the range of the first transformation is an anonymous space, |
6422 | * while the domain of the second is named "cluster_X". The range |
6423 | * of the former therefore needs to be adjusted before the two |
6424 | * can be combined. |
6425 | */ |
6426 | static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx, |
6427 | struct isl_sched_node *node, struct isl_clustering *c, |
6428 | struct isl_sched_graph *merge_graph) |
6429 | { |
6430 | struct isl_sched_node *scc_node, *cluster_node; |
6431 | int start, n; |
6432 | isl_id *id; |
6433 | isl_space *space; |
6434 | isl_multi_aff *ma, *ma2; |
6435 | |
6436 | scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space); |
6437 | if (scc_node && !is_node(&c->scc[node->scc], scc_node)) |
6438 | isl_die(ctx, isl_error_internal, "unable to find node",do { isl_handle_error(ctx, isl_error_internal, "unable to find node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6439); return ((void*)0); } while (0) |
6439 | return NULL)do { isl_handle_error(ctx, isl_error_internal, "unable to find node" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6439); return ((void*)0); } while (0); |
6440 | start = c->scc[node->scc].band_start; |
6441 | n = c->scc[node->scc].n_total_row - start; |
6442 | ma = node_extract_partial_schedule_multi_aff(scc_node, start, n); |
6443 | space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]); |
6444 | cluster_node = graph_find_node(ctx, merge_graph, space); |
6445 | if (cluster_node && !is_node(merge_graph, cluster_node)) |
6446 | isl_die(ctx, isl_error_internal, "unable to find cluster",do { isl_handle_error(ctx, isl_error_internal, "unable to find cluster" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6447); space = isl_space_free(space); } while (0) |
6447 | space = isl_space_free(space))do { isl_handle_error(ctx, isl_error_internal, "unable to find cluster" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6447); space = isl_space_free(space); } while (0); |
6448 | id = isl_space_get_tuple_id(space, isl_dim_set); |
6449 | ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id); |
6450 | isl_space_free(space); |
6451 | n = merge_graph->n_total_row; |
6452 | ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n); |
6453 | ma = isl_multi_aff_pullback_multi_aff(ma2, ma); |
6454 | |
6455 | return isl_map_from_multi_aff(ma); |
6456 | } |
6457 | |
6458 | /* Give a set of distances "set", are they bounded by a small constant |
6459 | * in direction "pos"? |
6460 | * In practice, check if they are bounded by 2 by checking that there |
6461 | * are no elements with a value greater than or equal to 3 or |
6462 | * smaller than or equal to -3. |
6463 | */ |
6464 | static isl_bool distance_is_bounded(__isl_keep isl_setisl_map *set, int pos) |
6465 | { |
6466 | isl_bool bounded; |
6467 | isl_setisl_map *test; |
6468 | |
6469 | if (!set) |
6470 | return isl_bool_error; |
6471 | |
6472 | test = isl_set_copy(set); |
6473 | test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3); |
6474 | bounded = isl_set_is_empty(test); |
6475 | isl_set_free(test); |
6476 | |
6477 | if (bounded < 0 || !bounded) |
6478 | return bounded; |
6479 | |
6480 | test = isl_set_copy(set); |
6481 | test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3); |
6482 | bounded = isl_set_is_empty(test); |
6483 | isl_set_free(test); |
6484 | |
6485 | return bounded; |
6486 | } |
6487 | |
6488 | /* Does the set "set" have a fixed (but possible parametric) value |
6489 | * at dimension "pos"? |
6490 | */ |
6491 | static isl_bool has_single_value(__isl_keep isl_setisl_map *set, int pos) |
6492 | { |
6493 | int n; |
6494 | isl_bool single; |
6495 | |
6496 | if (!set) |
6497 | return isl_bool_error; |
6498 | set = isl_set_copy(set); |
6499 | n = isl_set_dim(set, isl_dim_set); |
6500 | set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1)); |
6501 | set = isl_set_project_out(set, isl_dim_set, 0, pos); |
6502 | single = isl_set_is_singleton(set); |
6503 | isl_set_free(set); |
6504 | |
6505 | return single; |
6506 | } |
6507 | |
6508 | /* Does "map" have a fixed (but possible parametric) value |
6509 | * at dimension "pos" of either its domain or its range? |
6510 | */ |
6511 | static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) |
6512 | { |
6513 | isl_setisl_map *set; |
6514 | isl_bool single; |
6515 | |
6516 | set = isl_map_domain(isl_map_copy(map)); |
6517 | single = has_single_value(set, pos); |
6518 | isl_set_free(set); |
6519 | |
6520 | if (single < 0 || single) |
6521 | return single; |
6522 | |
6523 | set = isl_map_range(isl_map_copy(map)); |
6524 | single = has_single_value(set, pos); |
6525 | isl_set_free(set); |
6526 | |
6527 | return single; |
6528 | } |
6529 | |
6530 | /* Does the edge "edge" from "graph" have bounded dependence distances |
6531 | * in the merged graph "merge_graph" of a selection of clusters in "c"? |
6532 | * |
6533 | * Extract the complete transformations of the source and destination |
6534 | * nodes of the edge, apply them to the edge constraints and |
6535 | * compute the differences. Finally, check if these differences are bounded |
6536 | * in each direction. |
6537 | * |
6538 | * If the dimension of the band is greater than the number of |
6539 | * dimensions that can be expected to be optimized by the edge |
6540 | * (based on its weight), then also allow the differences to be unbounded |
6541 | * in the remaining dimensions, but only if either the source or |
6542 | * the destination has a fixed value in that direction. |
6543 | * This allows a statement that produces values that are used by |
6544 | * several instances of another statement to be merged with that |
6545 | * other statement. |
6546 | * However, merging such clusters will introduce an inherently |
6547 | * large proximity distance inside the merged cluster, meaning |
6548 | * that proximity distances will no longer be optimized in |
6549 | * subsequent merges. These merges are therefore only allowed |
6550 | * after all other possible merges have been tried. |
6551 | * The first time such a merge is encountered, the weight of the edge |
6552 | * is replaced by a negative weight. The second time (i.e., after |
6553 | * all merges over edges with a non-negative weight have been tried), |
6554 | * the merge is allowed. |
6555 | */ |
6556 | static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, |
6557 | struct isl_sched_graph *graph, struct isl_clustering *c, |
6558 | struct isl_sched_graph *merge_graph) |
6559 | { |
6560 | int i, n, n_slack; |
6561 | isl_bool bounded; |
6562 | isl_map *map, *t; |
6563 | isl_setisl_map *dist; |
6564 | |
6565 | map = isl_map_copy(edge->map); |
6566 | t = extract_node_transformation(ctx, edge->src, c, merge_graph); |
6567 | map = isl_map_apply_domain(map, t); |
6568 | t = extract_node_transformation(ctx, edge->dst, c, merge_graph); |
6569 | map = isl_map_apply_range(map, t); |
6570 | dist = isl_map_deltas(isl_map_copy(map)); |
6571 | |
6572 | bounded = isl_bool_true; |
6573 | n = isl_set_dim(dist, isl_dim_set); |
6574 | n_slack = n - edge->weight; |
6575 | if (edge->weight < 0) |
6576 | n_slack -= graph->max_weight + 1; |
6577 | for (i = 0; i < n; ++i) { |
6578 | isl_bool bounded_i, singular_i; |
6579 | |
6580 | bounded_i = distance_is_bounded(dist, i); |
6581 | if (bounded_i < 0) |
6582 | goto error; |
6583 | if (bounded_i) |
6584 | continue; |
6585 | if (edge->weight >= 0) |
6586 | bounded = isl_bool_false; |
6587 | n_slack--; |
6588 | if (n_slack < 0) |
6589 | break; |
6590 | singular_i = has_singular_src_or_dst(map, i); |
6591 | if (singular_i < 0) |
6592 | goto error; |
6593 | if (singular_i) |
6594 | continue; |
6595 | bounded = isl_bool_false; |
6596 | break; |
6597 | } |
6598 | if (!bounded && i >= n && edge->weight >= 0) |
6599 | edge->weight -= graph->max_weight + 1; |
6600 | isl_map_free(map); |
6601 | isl_set_free(dist); |
6602 | |
6603 | return bounded; |
6604 | error: |
6605 | isl_map_free(map); |
6606 | isl_set_free(dist); |
6607 | return isl_bool_error; |
6608 | } |
6609 | |
6610 | /* Should the clusters be merged based on the cluster schedule |
6611 | * in the current (and only) band of "merge_graph"? |
6612 | * "graph" is the original dependence graph, while "c" records |
6613 | * which SCCs are involved in the latest merge. |
6614 | * |
6615 | * In particular, is there at least one proximity constraint |
6616 | * that is optimized by the merge? |
6617 | * |
6618 | * A proximity constraint is considered to be optimized |
6619 | * if the dependence distances are small. |
6620 | */ |
6621 | static isl_bool ok_to_merge_proximity(isl_ctx *ctx, |
6622 | struct isl_sched_graph *graph, struct isl_clustering *c, |
6623 | struct isl_sched_graph *merge_graph) |
6624 | { |
6625 | int i; |
6626 | |
6627 | for (i = 0; i < graph->n_edge; ++i) { |
6628 | struct isl_sched_edge *edge = &graph->edge[i]; |
6629 | isl_bool bounded; |
6630 | |
6631 | if (!is_proximity(edge)) |
6632 | continue; |
6633 | if (!c->scc_in_merge[edge->src->scc]) |
6634 | continue; |
6635 | if (!c->scc_in_merge[edge->dst->scc]) |
6636 | continue; |
6637 | if (c->scc_cluster[edge->dst->scc] == |
6638 | c->scc_cluster[edge->src->scc]) |
6639 | continue; |
6640 | bounded = has_bounded_distances(ctx, edge, graph, c, |
6641 | merge_graph); |
6642 | if (bounded < 0 || bounded) |
6643 | return bounded; |
6644 | } |
6645 | |
6646 | return isl_bool_false; |
6647 | } |
6648 | |
6649 | /* Should the clusters be merged based on the cluster schedule |
6650 | * in the current (and only) band of "merge_graph"? |
6651 | * "graph" is the original dependence graph, while "c" records |
6652 | * which SCCs are involved in the latest merge. |
6653 | * |
6654 | * If the current band is empty, then the clusters should not be merged. |
6655 | * |
6656 | * If the band depth should be maximized and the merge schedule |
6657 | * is incomplete (meaning that the dimension of some of the schedule |
6658 | * bands in the original schedule will be reduced), then the clusters |
6659 | * should not be merged. |
6660 | * |
6661 | * If the schedule_maximize_coincidence option is set, then check that |
6662 | * the number of coincident schedule dimensions is not reduced. |
6663 | * |
6664 | * Finally, only allow the merge if at least one proximity |
6665 | * constraint is optimized. |
6666 | */ |
6667 | static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
6668 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
6669 | { |
6670 | if (merge_graph->n_total_row == merge_graph->band_start) |
6671 | return isl_bool_false; |
6672 | |
6673 | if (isl_options_get_schedule_maximize_band_depth(ctx) && |
6674 | merge_graph->n_total_row < merge_graph->maxvar) |
6675 | return isl_bool_false; |
6676 | |
6677 | if (isl_options_get_schedule_maximize_coincidence(ctx)) { |
6678 | isl_bool ok; |
6679 | |
6680 | ok = ok_to_merge_coincident(c, merge_graph); |
6681 | if (ok < 0 || !ok) |
6682 | return ok; |
6683 | } |
6684 | |
6685 | return ok_to_merge_proximity(ctx, graph, c, merge_graph); |
6686 | } |
6687 | |
6688 | /* Apply the schedule in "t_node" to the "n" rows starting at "first" |
6689 | * of the schedule in "node" and return the result. |
6690 | * |
6691 | * That is, essentially compute |
6692 | * |
6693 | * T * N(first:first+n-1) |
6694 | * |
6695 | * taking into account the constant term and the parameter coefficients |
6696 | * in "t_node". |
6697 | */ |
6698 | static __isl_give isl_mat *node_transformation(isl_ctx *ctx, |
6699 | struct isl_sched_node *t_node, struct isl_sched_node *node, |
6700 | int first, int n) |
6701 | { |
6702 | int i, j; |
6703 | isl_mat *t; |
6704 | int n_row, n_col, n_param, n_var; |
6705 | |
6706 | n_param = node->nparam; |
6707 | n_var = node->nvar; |
6708 | n_row = isl_mat_rows(t_node->sched); |
6709 | n_col = isl_mat_cols(node->sched); |
6710 | t = isl_mat_alloc(ctx, n_row, n_col); |
6711 | if (!t) |
6712 | return NULL((void*)0); |
6713 | for (i = 0; i < n_row; ++i) { |
6714 | isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param); |
6715 | isl_seq_clr(t->row[i] + 1 + n_param, n_var); |
6716 | for (j = 0; j < n; ++j) |
6717 | isl_seq_addmul(t->row[i], |
6718 | t_node->sched->row[i][1 + n_param + j], |
6719 | node->sched->row[first + j], |
6720 | 1 + n_param + n_var); |
6721 | } |
6722 | return t; |
6723 | } |
6724 | |
6725 | /* Apply the cluster schedule in "t_node" to the current band |
6726 | * schedule of the nodes in "graph". |
6727 | * |
6728 | * In particular, replace the rows starting at band_start |
6729 | * by the result of applying the cluster schedule in "t_node" |
6730 | * to the original rows. |
6731 | * |
6732 | * The coincidence of the schedule is determined by the coincidence |
6733 | * of the cluster schedule. |
6734 | */ |
6735 | static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, |
6736 | struct isl_sched_node *t_node) |
6737 | { |
6738 | int i, j; |
6739 | int n_new; |
6740 | int start, n; |
6741 | |
6742 | start = graph->band_start; |
6743 | n = graph->n_total_row - start; |
6744 | |
6745 | n_new = isl_mat_rows(t_node->sched); |
6746 | for (i = 0; i < graph->n; ++i) { |
6747 | struct isl_sched_node *node = &graph->node[i]; |
6748 | isl_mat *t; |
6749 | |
6750 | t = node_transformation(ctx, t_node, node, start, n); |
6751 | node->sched = isl_mat_drop_rows(node->sched, start, n); |
6752 | node->sched = isl_mat_concat(node->sched, t); |
6753 | node->sched_map = isl_map_free(node->sched_map); |
6754 | if (!node->sched) |
6755 | return isl_stat_error; |
6756 | for (j = 0; j < n_new; ++j) |
6757 | node->coincident[start + j] = t_node->coincident[j]; |
6758 | } |
6759 | graph->n_total_row -= n; |
6760 | graph->n_row -= n; |
6761 | graph->n_total_row += n_new; |
6762 | graph->n_row += n_new; |
6763 | |
6764 | return isl_stat_ok; |
6765 | } |
6766 | |
6767 | /* Merge the clusters marked for merging in "c" into a single |
6768 | * cluster using the cluster schedule in the current band of "merge_graph". |
6769 | * The representative SCC for the new cluster is the SCC with |
6770 | * the smallest index. |
6771 | * |
6772 | * The current band schedule of each SCC in the new cluster is obtained |
6773 | * by applying the schedule of the corresponding original cluster |
6774 | * to the original band schedule. |
6775 | * All SCCs in the new cluster have the same number of schedule rows. |
6776 | */ |
6777 | static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, |
6778 | struct isl_sched_graph *merge_graph) |
6779 | { |
6780 | int i; |
6781 | int cluster = -1; |
6782 | isl_space *space; |
6783 | |
6784 | for (i = 0; i < c->n; ++i) { |
6785 | struct isl_sched_node *node; |
6786 | |
6787 | if (!c->scc_in_merge[i]) |
6788 | continue; |
6789 | if (cluster < 0) |
6790 | cluster = i; |
6791 | space = cluster_space(&c->scc[i], c->scc_cluster[i]); |
6792 | node = graph_find_node(ctx, merge_graph, space); |
6793 | isl_space_free(space); |
6794 | if (!node) |
6795 | return isl_stat_error; |
6796 | if (!is_node(merge_graph, node)) |
6797 | isl_die(ctx, isl_error_internal,do { isl_handle_error(ctx, isl_error_internal, "unable to find cluster" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6799); return isl_stat_error; } while (0) |
6798 | "unable to find cluster",do { isl_handle_error(ctx, isl_error_internal, "unable to find cluster" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6799); return isl_stat_error; } while (0) |
6799 | return isl_stat_error)do { isl_handle_error(ctx, isl_error_internal, "unable to find cluster" , "/build/llvm-toolchain-snapshot-10~+20200102111109+a2976c490da/polly/lib/External/isl/isl_scheduler.c" , 6799); return isl_stat_error; } while (0); |
6800 | if (transform(ctx, &c->scc[i], node) < 0) |
6801 | return isl_stat_error; |
6802 | c->scc_cluster[i] = cluster; |
6803 | } |
6804 | |
6805 | return isl_stat_ok; |
6806 | } |
6807 | |
6808 | /* Try and merge the clusters of SCCs marked in c->scc_in_merge |
6809 | * by scheduling the current cluster bands with respect to each other. |
6810 | * |
6811 | * Construct a dependence graph with a space for each cluster and |
6812 | * with the coordinates of each space corresponding to the schedule |
6813 | * dimensions of the current band of that cluster. |
6814 | * Construct a cluster schedule in this cluster dependence graph and |
6815 | * apply it to the current cluster bands if it is applicable |
6816 | * according to ok_to_merge. |
6817 | * |
6818 | * If the number of remaining schedule dimensions in a cluster |
6819 | * with a non-maximal current schedule dimension is greater than |
6820 | * the number of remaining schedule dimensions in clusters |
6821 | * with a maximal current schedule dimension, then restrict |
6822 | * the number of rows to be computed in the cluster schedule |
6823 | * to the minimal such non-maximal current schedule dimension. |
6824 | * Do this by adjusting merge_graph.maxvar. |
6825 | * |
6826 | * Return isl_bool_true if the clusters have effectively been merged |
6827 | * into a single cluster. |
6828 | * |
6829 | * Note that since the standard scheduling algorithm minimizes the maximal |
6830 | * distance over proximity constraints, the proximity constraints between |
6831 | * the merged clusters may not be optimized any further than what is |
6832 | * sufficient to bring the distances within the limits of the internal |
6833 | * proximity constraints inside the individual clusters. |
6834 | * It may therefore make sense to perform an additional translation step |
6835 | * to bring the clusters closer to each other, while maintaining |
6836 | * the linear part of the merging schedule found using the standard |
6837 | * scheduling algorithm. |
6838 | */ |
6839 | static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
6840 | struct isl_clustering *c) |
6841 | { |
6842 | struct isl_sched_graph merge_graph = { 0 }; |
6843 | isl_bool merged; |
6844 | |
6845 | if (init_merge_graph(ctx, graph, c, &merge_graph) < 0) |
6846 | goto error; |
6847 | |
6848 | if (compute_maxvar(&merge_graph) < 0) |
6849 | goto error; |
6850 | if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0) |
6851 | goto error; |
6852 | if (compute_schedule_wcc_band(ctx, &merge_graph) < 0) |
6853 | goto error; |
6854 | merged = ok_to_merge(ctx, graph, c, &merge_graph); |
6855 | if (merged && merge(ctx, c, &merge_graph) < 0) |
6856 | goto error; |
6857 | |
6858 | graph_free(ctx, &merge_graph); |
6859 | return merged; |
6860 | error: |
6861 | graph_free(ctx, &merge_graph); |
6862 | return isl_bool_error; |
6863 | } |
6864 | |
6865 | /* Is there any edge marked "no_merge" between two SCCs that are |
6866 | * about to be merged (i.e., that are set in "scc_in_merge")? |
6867 | * "merge_edge" is the proximity edge along which the clusters of SCCs |
6868 | * are going to be merged. |
6869 | * |
6870 | * If there is any edge between two SCCs with a negative weight, |
6871 | * while the weight of "merge_edge" is non-negative, then this |
6872 | * means that the edge was postponed. "merge_edge" should then |
6873 | * also be postponed since merging along the edge with negative weight should |
6874 | * be postponed until all edges with non-negative weight have been tried. |
6875 | * Replace the weight of "merge_edge" by a negative weight as well and |
6876 | * tell the caller not to attempt a merge. |
6877 | */ |
6878 | static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, |
6879 | struct isl_sched_edge *merge_edge) |
6880 | { |
6881 | int i; |
6882 | |
6883 | for (i = 0; i < graph->n_edge; ++i) { |
6884 | struct isl_sched_edge *edge = &graph->edge[i]; |
6885 | |
6886 | if (!scc_in_merge[edge->src->scc]) |
6887 | continue; |
6888 | if (!scc_in_merge[edge->dst->scc]) |
6889 | continue; |
6890 | if (edge->no_merge) |
6891 | return 1; |
6892 | if (merge_edge->weight >= 0 && edge->weight < 0) { |
6893 | merge_edge->weight -= graph->max_weight + 1; |
6894 | return 1; |
6895 | } |
6896 | } |
6897 | |
6898 | return 0; |
6899 | } |
6900 | |
6901 | /* Merge the two clusters in "c" connected by the edge in "graph" |
6902 | * with index "edge" into a single cluster. |
6903 | * If it turns out to be impossible to merge these two clusters, |
6904 | * then mark the edge as "no_merge" such that it will not be |
6905 | * considered again. |
6906 | * |
6907 | * First mark all SCCs that need to be merged. This includes the SCCs |
6908 | * in the two clusters, but it may also include the SCCs |
6909 | * of intermediate clusters. |
6910 | * If there is already a no_merge edge between any pair of such SCCs, |
6911 | * then simply mark the current edge as no_merge as well. |
6912 | * Likewise, if any of those edges was postponed by has_bounded_distances, |
6913 | * then postpone the current edge as well. |
6914 | * Otherwise, try and merge the clusters and mark "edge" as "no_merge" |
6915 | * if the clusters did not end up getting merged, unless the non-merge |
6916 | * is due to the fact that the edge was postponed. This postponement |
6917 | * can be recognized by a change in weight (from non-negative to negative). |
6918 | */ |
6919 | static isl_stat merge_clusters_along_edge(isl_ctx *ctx, |
6920 | struct isl_sched_graph *graph, int edge, struct isl_clustering *c) |
6921 | { |
6922 | isl_bool merged; |
6923 | int edge_weight = graph->edge[edge].weight; |
6924 | |
6925 | if (mark_merge_sccs(ctx, graph, edge, c) < 0) |
6926 | return isl_stat_error; |
6927 | |
6928 | if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge])) |
6929 | merged = isl_bool_false; |
6930 | else |
6931 | merged = try_merge(ctx, graph, c); |
6932 | if (merged < 0) |
6933 | return isl_stat_error; |
6934 | if (!merged && edge_weight == graph->edge[edge].weight) |
6935 | graph->edge[edge].no_merge = 1; |
6936 | |
6937 | return isl_stat_ok; |
6938 | } |
6939 | |
6940 | /* Does "node" belong to the cluster identified by "cluster"? |
6941 | */ |
6942 | static int node_cluster_exactly(struct isl_sched_node *node, int cluster) |
6943 | { |
6944 | return node->cluster == cluster; |
6945 | } |
6946 | |
6947 | /* Does "edge" connect two nodes belonging to the cluster |
6948 | * identified by "cluster"? |
6949 | */ |
6950 | static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) |
6951 | { |
6952 | return edge->src->cluster == cluster && edge->dst->cluster == cluster; |
6953 | } |
6954 | |
6955 | /* Swap the schedule of "node1" and "node2". |
6956 | * Both nodes have been derived from the same node in a common parent graph. |
6957 | * Since the "coincident" field is shared with that node |
6958 | * in the parent graph, there is no need to also swap this field. |
6959 | */ |
6960 | static void swap_sched(struct isl_sched_node *node1, |
6961 | struct isl_sched_node *node2) |
6962 | { |
6963 | isl_mat *sched; |
6964 | isl_map *sched_map; |
6965 | |
6966 | sched = node1->sched; |
6967 | node1->sched = node2->sched; |
6968 | node2->sched = sched; |
6969 | |
6970 | sched_map = node1->sched_map; |
6971 | node1->sched_map = node2->sched_map; |
6972 | node2->sched_map = sched_map; |
6973 | } |
6974 | |
6975 | /* Copy the current band schedule from the SCCs that form the cluster |
6976 | * with index "pos" to the actual cluster at position "pos". |
6977 | * By construction, the index of the first SCC that belongs to the cluster |
6978 | * is also "pos". |
6979 | * |
6980 | * The order of the nodes inside both the SCCs and the cluster |
6981 | * is assumed to be same as the order in the original "graph". |
6982 | * |
6983 | * Since the SCC graphs will no longer be used after this function, |
6984 | * the schedules are actually swapped rather than copied. |
6985 | */ |
6986 | static isl_stat copy_partial(struct isl_sched_graph *graph, |
6987 | struct isl_clustering *c, int pos) |
6988 | { |
6989 | int i, j; |
6990 | |
6991 | c->cluster[pos].n_total_row = c->scc[pos].n_total_row; |
6992 | c->cluster[pos].n_row = c->scc[pos].n_row; |
6993 | c->cluster[pos].maxvar = c->scc[pos].maxvar; |
6994 | j = 0; |
6995 | for (i = 0; i < graph->n; ++i) { |
6996 | int k; |
6997 | int s; |
6998 | |
6999 | if (graph->node[i].cluster != pos) |
7000 | continue; |
7001 | s = graph->node[i].scc; |
7002 | k = c->scc_node[s]++; |
7003 | swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]); |
7004 | if (c->scc[s].maxvar > c->cluster[pos].maxvar) |
7005 | c->cluster[pos].maxvar = c->scc[s].maxvar; |
7006 | ++j; |
7007 | } |
7008 | |
7009 | return isl_stat_ok; |
7010 | } |
7011 | |
7012 | /* Is there a (conditional) validity dependence from node[j] to node[i], |
7013 | * forcing node[i] to follow node[j] or do the nodes belong to the same |
7014 | * cluster? |
7015 | */ |
7016 | static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) |
7017 | { |
7018 | struct isl_sched_graph *graph = user; |
7019 | |
7020 | if (graph->node[i].cluster == graph->node[j].cluster) |
7021 | return isl_bool_true; |
7022 | return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); |
7023 | } |
7024 | |
7025 | /* Extract the merged clusters of SCCs in "graph", sort them, and |
7026 | * store them in c->clusters. Update c->scc_cluster accordingly. |
7027 | * |
7028 | * First keep track of the cluster containing the SCC to which a node |
7029 | * belongs in the node itself. |
7030 | * Then extract the clusters into c->clusters, copying the current |
7031 | * band schedule from the SCCs that belong to the cluster. |
7032 | * Do this only once per cluster. |
7033 | * |
7034 | * Finally, topologically sort the clusters and update c->scc_cluster |
7035 | * to match the new scc numbering. While the SCCs were originally |
7036 | * sorted already, some SCCs that depend on some other SCCs may |
7037 | * have been merged with SCCs that appear before these other SCCs. |
7038 | * A reordering may therefore be required. |
7039 | */ |
7040 | static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph, |
7041 | struct isl_clustering *c) |
7042 | { |
7043 | int i; |
7044 | |
7045 | for (i = 0; i < graph->n; ++i) |
7046 | graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; |
7047 | |
7048 | for (i = 0; i < graph->scc; ++i) { |
7049 | if (c->scc_cluster[i] != i) |
7050 | continue; |
7051 | if (extract_sub_graph(ctx, graph, &node_cluster_exactly, |
7052 | &edge_cluster_exactly, i, &c->cluster[i]) < 0) |
7053 | return isl_stat_error; |
7054 | c->cluster[i].src_scc = -1; |
7055 | c->cluster[i].dst_scc = -1; |
7056 | if (copy_partial(graph, c, i) < 0) |
7057 | return isl_stat_error; |
7058 | } |
7059 | |
7060 | if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0) |
7061 | return isl_stat_error; |
7062 | for (i = 0; i < graph->n; ++i) |
7063 | c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; |
7064 | |
7065 | return isl_stat_ok; |
7066 | } |
7067 | |
7068 | /* Compute weights on the proximity edges of "graph" that can |
7069 | * be used by find_proximity to find the most appropriate |
7070 | * proximity edge to use to merge two clusters in "c". |
7071 | * The weights are also used by has_bounded_distances to determine |
7072 | * whether the merge should be allowed. |
7073 | * Store the maximum of the computed weights in graph->max_weight. |
7074 | * |
7075 | * The computed weight is a measure for the number of remaining schedule |
7076 | * dimensions that can still be completely aligned. |
7077 | * In particular, compute the number of equalities between |
7078 | * input dimensions and output dimensions in the proximity constraints. |
7079 | * The directions that are already handled by outer schedule bands |
7080 | * are projected out prior to determining this number. |
7081 | * |
7082 | * Edges that will never be considered by find_proximity are ignored. |
7083 | */ |
7084 | static isl_stat compute_weights(struct isl_sched_graph *graph, |
7085 | struct isl_clustering *c) |
7086 | { |
7087 | int i; |
7088 | |
7089 | graph->max_weight = 0; |
7090 | |
7091 | for (i = 0; i < graph->n_edge; ++i) { |
7092 | struct isl_sched_edge *edge = &graph->edge[i]; |
7093 | struct isl_sched_node *src = edge->src; |
7094 | struct isl_sched_node *dst = edge->dst; |
7095 | isl_basic_map *hull; |
7096 | isl_bool prox; |
7097 | int n_in, n_out; |
7098 | |
7099 | prox = is_non_empty_proximity(edge); |
7100 | if (prox < 0) |
7101 | return isl_stat_error; |
7102 | if (!prox) |
7103 | continue; |
7104 | if (bad_cluster(&c->scc[edge->src->scc]) || |
7105 | bad_cluster(&c->scc[edge->dst->scc])) |
7106 | continue; |
7107 | if (c->scc_cluster[edge->dst->scc] == |
7108 | c->scc_cluster[edge->src->scc]) |
7109 | continue; |
7110 | |
7111 | hull = isl_map_affine_hull(isl_map_copy(edge->map)); |
7112 | hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0, |
7113 | isl_mat_copy(src->vmap)); |
7114 | hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0, |
7115 | isl_mat_copy(dst->vmap)); |
7116 | hull = isl_basic_map_project_out(hull, |
7117 | isl_dim_in, 0, src->rank); |
7118 | hull = isl_basic_map_project_out(hull, |
7119 | isl_dim_out, 0, dst->rank); |
7120 | hull = isl_basic_map_remove_divs(hull); |
7121 | n_in = isl_basic_map_dim(hull, isl_dim_in); |
7122 | n_out = isl_basic_map_dim(hull, isl_dim_out); |
7123 | hull = isl_basic_map_drop_constraints_not_involving_dims(hull, |
7124 | isl_dim_in, 0, n_in); |
7125 | hull = isl_basic_map_drop_constraints_not_involving_dims(hull, |
7126 | isl_dim_out, 0, n_out); |
7127 | if (!hull) |
7128 | return isl_stat_error; |
7129 | edge->weight = isl_basic_map_n_equality(hull); |
7130 | isl_basic_map_free(hull); |
7131 | |
7132 | if (edge->weight > graph->max_weight) |
7133 | graph->max_weight = edge->weight; |
7134 | } |
7135 | |
7136 | return isl_stat_ok; |
7137 | } |
7138 | |
7139 | /* Call compute_schedule_finish_band on each of the clusters in "c" |
7140 | * in their topological order. This order is determined by the scc |
7141 | * fields of the nodes in "graph". |
7142 | * Combine the results in a sequence expressing the topological order. |
7143 | * |
7144 | * If there is only one cluster left, then there is no need to introduce |
7145 | * a sequence node. Also, in this case, the cluster necessarily contains |
7146 | * the SCC at position 0 in the original graph and is therefore also |
7147 | * stored in the first cluster of "c". |
7148 | */ |
7149 | static __isl_give isl_schedule_node *finish_bands_clustering( |
7150 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
7151 | struct isl_clustering *c) |
7152 | { |
7153 | int i; |
7154 | isl_ctx *ctx; |
7155 | isl_union_set_list *filters; |
7156 | |
7157 | if (graph->scc == 1) |
7158 | return compute_schedule_finish_band(node, &c->cluster[0], 0); |
7159 | |
7160 | ctx = isl_schedule_node_get_ctx(node); |
7161 | |
7162 | filters = extract_sccs(ctx, graph); |
7163 | node = isl_schedule_node_insert_sequence(node, filters); |
7164 | |
7165 | for (i = 0; i < graph->scc; ++i) { |
7166 | int j = c->scc_cluster[i]; |
7167 | node = isl_schedule_node_child(node, i); |
7168 | node = isl_schedule_node_child(node, 0); |
7169 | node = compute_schedule_finish_band(node, &c->cluster[j], 0); |
7170 | node = isl_schedule_node_parent(node); |
7171 | node = isl_schedule_node_parent(node); |
7172 | } |
7173 | |
7174 | return node; |
7175 | } |
7176 | |
7177 | /* Compute a schedule for a connected dependence graph by first considering |
7178 | * each strongly connected component (SCC) in the graph separately and then |
7179 | * incrementally combining them into clusters. |
7180 | * Return the updated schedule node. |
7181 | * |
7182 | * Initially, each cluster consists of a single SCC, each with its |
7183 | * own band schedule. The algorithm then tries to merge pairs |
7184 | * of clusters along a proximity edge until no more suitable |
7185 | * proximity edges can be found. During this merging, the schedule |
7186 | * is maintained in the individual SCCs. |
7187 | * After the merging is completed, the full resulting clusters |
7188 | * are extracted and in finish_bands_clustering, |
7189 | * compute_schedule_finish_band is called on each of them to integrate |
7190 | * the band into "node" and to continue the computation. |
7191 | * |
7192 | * compute_weights initializes the weights that are used by find_proximity. |
7193 | */ |
7194 | static __isl_give isl_schedule_node *compute_schedule_wcc_clustering( |
7195 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
7196 | { |
7197 | isl_ctx *ctx; |
7198 | struct isl_clustering c; |
7199 | int i; |
7200 | |
7201 | ctx = isl_schedule_node_get_ctx(node); |
7202 | |
7203 | if (clustering_init(ctx, &c, graph) < 0) |
7204 | goto error; |
7205 | |
7206 | if (compute_weights(graph, &c) < 0) |
7207 | goto error; |
7208 | |
7209 | for (;;) { |
7210 | i = find_proximity(graph, &c); |
7211 | if (i < 0) |
7212 | goto error; |
7213 | if (i >= graph->n_edge) |
7214 | break; |
7215 | if (merge_clusters_along_edge(ctx, graph, i, &c) < 0) |
7216 | goto error; |
7217 | } |
7218 | |
7219 | if (extract_clusters(ctx, graph, &c) < 0) |
7220 | goto error; |
7221 | |
7222 | node = finish_bands_clustering(node, graph, &c); |
7223 | |
7224 | clustering_free(ctx, &c); |
7225 | return node; |
7226 | error: |
7227 | clustering_free(ctx, &c); |
7228 | return isl_schedule_node_free(node); |
7229 | } |
7230 | |
7231 | /* Compute a schedule for a connected dependence graph and return |
7232 | * the updated schedule node. |
7233 | * |
7234 | * If Feautrier's algorithm is selected, we first recursively try to satisfy |
7235 | * as many validity dependences as possible. When all validity dependences |
7236 | * are satisfied we extend the schedule to a full-dimensional schedule. |
7237 | * |
7238 | * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering |
7239 | * depending on whether the user has selected the option to try and |
7240 | * compute a schedule for the entire (weakly connected) component first. |
7241 | * If there is only a single strongly connected component (SCC), then |
7242 | * there is no point in trying to combine SCCs |
7243 | * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole |
7244 | * is called instead. |
7245 | */ |
7246 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
7247 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
7248 | { |
7249 | isl_ctx *ctx; |
7250 | |
7251 | if (!node) |
7252 | return NULL((void*)0); |
7253 | |
7254 | ctx = isl_schedule_node_get_ctx(node); |
7255 | if (detect_sccs(ctx, graph) < 0) |
7256 | return isl_schedule_node_free(node); |
7257 | |
7258 | if (compute_maxvar(graph) < 0) |
7259 | return isl_schedule_node_free(node); |
7260 | |
7261 | if (need_feautrier_step(ctx, graph)) |
7262 | return compute_schedule_wcc_feautrier(node, graph); |
7263 | |
7264 | if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) |
7265 | return compute_schedule_wcc_whole(node, graph); |
7266 | else |
7267 | return compute_schedule_wcc_clustering(node, graph); |
7268 | } |
7269 | |
7270 | /* Compute a schedule for each group of nodes identified by node->scc |
7271 | * separately and then combine them in a sequence node (or as set node |
7272 | * if graph->weak is set) inserted at position "node" of the schedule tree. |
7273 | * Return the updated schedule node. |
7274 | * |
7275 | * If "wcc" is set then each of the groups belongs to a single |
7276 | * weakly connected component in the dependence graph so that |
7277 | * there is no need for compute_sub_schedule to look for weakly |
7278 | * connected components. |
7279 | * |
7280 | * If a set node would be introduced and if the number of components |
7281 | * is equal to the number of nodes, then check if the schedule |
7282 | * is already complete. If so, a redundant set node would be introduced |
7283 | * (without any further descendants) stating that the statements |
7284 | * can be executed in arbitrary order, which is also expressed |
7285 | * by the absence of any node. Refrain from inserting any nodes |
7286 | * in this case and simply return. |
7287 | */ |
7288 | static __isl_give isl_schedule_node *compute_component_schedule( |
7289 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
7290 | int wcc) |
7291 | { |
7292 | int component; |
7293 | isl_ctx *ctx; |
7294 | isl_union_set_list *filters; |
7295 | |
7296 | if (!node) |
7297 | return NULL((void*)0); |
7298 | |
7299 | if (graph->weak && graph->scc == graph->n) { |
7300 | if (compute_maxvar(graph) < 0) |
7301 | return isl_schedule_node_free(node); |
7302 | if (graph->n_row >= graph->maxvar) |
7303 | return node; |
7304 | } |
7305 | |
7306 | ctx = isl_schedule_node_get_ctx(node); |
7307 | filters = extract_sccs(ctx, graph); |
7308 | if (graph->weak) |
7309 | node = isl_schedule_node_insert_set(node, filters); |
7310 | else |
7311 | node = isl_schedule_node_insert_sequence(node, filters); |
7312 | |
7313 | for (component = 0; component < graph->scc; ++component) { |
7314 | node = isl_schedule_node_child(node, component); |
7315 | node = isl_schedule_node_child(node, 0); |
7316 | node = compute_sub_schedule(node, ctx, graph, |
7317 | &node_scc_exactly, |
7318 | &edge_scc_exactly, component, wcc); |
7319 | node = isl_schedule_node_parent(node); |
7320 | node = isl_schedule_node_parent(node); |
7321 | } |
7322 | |
7323 | return node; |
7324 | } |
7325 | |
7326 | /* Compute a schedule for the given dependence graph and insert it at "node". |
7327 | * Return the updated schedule node. |
7328 | * |
7329 | * We first check if the graph is connected (through validity and conditional |
7330 | * validity dependences) and, if not, compute a schedule |
7331 | * for each component separately. |
7332 | * If the schedule_serialize_sccs option is set, then we check for strongly |
7333 | * connected components instead and compute a separate schedule for |
7334 | * each such strongly connected component. |
7335 | */ |
7336 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
7337 | struct isl_sched_graph *graph) |
7338 | { |
7339 | isl_ctx *ctx; |
7340 | |
7341 | if (!node) |
7342 | return NULL((void*)0); |
7343 | |
7344 | ctx = isl_schedule_node_get_ctx(node); |
7345 | if (isl_options_get_schedule_serialize_sccs(ctx)) { |
7346 | if (detect_sccs(ctx, graph) < 0) |
7347 | return isl_schedule_node_free(node); |
7348 | } else { |
7349 | if (detect_wccs(ctx, graph) < 0) |
7350 | return isl_schedule_node_free(node); |
7351 | } |
7352 | |
7353 | if (graph->scc > 1) |
7354 | return compute_component_schedule(node, graph, 1); |
7355 | |
7356 | return compute_schedule_wcc(node, graph); |
7357 | } |
7358 | |
7359 | /* Compute a schedule on sc->domain that respects the given schedule |
7360 | * constraints. |
7361 | * |
7362 | * In particular, the schedule respects all the validity dependences. |
7363 | * If the default isl scheduling algorithm is used, it tries to minimize |
7364 | * the dependence distances over the proximity dependences. |
7365 | * If Feautrier's scheduling algorithm is used, the proximity dependence |
7366 | * distances are only minimized during the extension to a full-dimensional |
7367 | * schedule. |
7368 | * |
7369 | * If there are any condition and conditional validity dependences, |
7370 | * then the conditional validity dependences may be violated inside |
7371 | * a tilable band, provided they have no adjacent non-local |
7372 | * condition dependences. |
7373 | */ |
7374 | __isl_give isl_schedule *isl_schedule_constraints_compute_schedule( |
7375 | __isl_take isl_schedule_constraints *sc) |
7376 | { |
7377 | isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc); |
7378 | struct isl_sched_graph graph = { 0 }; |
7379 | isl_schedule *sched; |
7380 | isl_schedule_node *node; |
7381 | isl_union_set *domain; |
7382 | |
7383 | sc = isl_schedule_constraints_align_params(sc); |
7384 | |
7385 | domain = isl_schedule_constraints_get_domain(sc); |
7386 | if (isl_union_set_n_set(domain) == 0) { |
7387 | isl_schedule_constraints_free(sc); |
7388 | return isl_schedule_from_domain(domain); |
7389 | } |
7390 | |
7391 | if (graph_init(&graph, sc) < 0) |
7392 | domain = isl_union_set_free(domain); |
7393 | |
7394 | node = isl_schedule_node_from_domain(domain); |
7395 | node = isl_schedule_node_child(node, 0); |
7396 | if (graph.n > 0) |
7397 | node = compute_schedule(node, &graph); |
7398 | sched = isl_schedule_node_get_schedule(node); |
7399 | isl_schedule_node_free(node); |
7400 | |
7401 | graph_free(ctx, &graph); |
7402 | isl_schedule_constraints_free(sc); |
7403 | |
7404 | return sched; |
7405 | } |
7406 | |
7407 | /* Compute a schedule for the given union of domains that respects |
7408 | * all the validity dependences and minimizes |
7409 | * the dependence distances over the proximity dependences. |
7410 | * |
7411 | * This function is kept for backward compatibility. |
7412 | */ |
7413 | __isl_give isl_schedule *isl_union_set_compute_schedule( |
7414 | __isl_take isl_union_set *domain, |
7415 | __isl_take isl_union_map *validity, |
7416 | __isl_take isl_union_map *proximity) |
7417 | { |
7418 | isl_schedule_constraints *sc; |
7419 | |
7420 | sc = isl_schedule_constraints_on_domain(domain); |
7421 | sc = isl_schedule_constraints_set_validity(sc, validity); |
7422 | sc = isl_schedule_constraints_set_proximity(sc, proximity); |
7423 | |
7424 | return isl_schedule_constraints_compute_schedule(sc); |
7425 | } |