File: | polly/lib/CodeGen/IslNodeBuilder.cpp |
Warning: | line 1490, column 26 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains the IslNodeBuilder, a class to translate an isl AST into | |||
10 | // a LLVM-IR AST. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "polly/CodeGen/IslNodeBuilder.h" | |||
15 | #include "polly/CodeGen/BlockGenerators.h" | |||
16 | #include "polly/CodeGen/CodeGeneration.h" | |||
17 | #include "polly/CodeGen/IslAst.h" | |||
18 | #include "polly/CodeGen/IslExprBuilder.h" | |||
19 | #include "polly/CodeGen/LoopGeneratorsGOMP.h" | |||
20 | #include "polly/CodeGen/LoopGeneratorsKMP.h" | |||
21 | #include "polly/CodeGen/RuntimeDebugBuilder.h" | |||
22 | #include "polly/Options.h" | |||
23 | #include "polly/ScopInfo.h" | |||
24 | #include "polly/Support/ISLTools.h" | |||
25 | #include "polly/Support/SCEVValidator.h" | |||
26 | #include "polly/Support/ScopHelper.h" | |||
27 | #include "llvm/ADT/APInt.h" | |||
28 | #include "llvm/ADT/PostOrderIterator.h" | |||
29 | #include "llvm/ADT/SetVector.h" | |||
30 | #include "llvm/ADT/SmallPtrSet.h" | |||
31 | #include "llvm/ADT/Statistic.h" | |||
32 | #include "llvm/Analysis/LoopInfo.h" | |||
33 | #include "llvm/Analysis/RegionInfo.h" | |||
34 | #include "llvm/Analysis/ScalarEvolution.h" | |||
35 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
36 | #include "llvm/IR/BasicBlock.h" | |||
37 | #include "llvm/IR/Constant.h" | |||
38 | #include "llvm/IR/Constants.h" | |||
39 | #include "llvm/IR/DataLayout.h" | |||
40 | #include "llvm/IR/DerivedTypes.h" | |||
41 | #include "llvm/IR/Dominators.h" | |||
42 | #include "llvm/IR/Function.h" | |||
43 | #include "llvm/IR/InstrTypes.h" | |||
44 | #include "llvm/IR/Instruction.h" | |||
45 | #include "llvm/IR/Instructions.h" | |||
46 | #include "llvm/IR/Type.h" | |||
47 | #include "llvm/IR/Value.h" | |||
48 | #include "llvm/Support/Casting.h" | |||
49 | #include "llvm/Support/CommandLine.h" | |||
50 | #include "llvm/Support/ErrorHandling.h" | |||
51 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
52 | #include "isl/aff.h" | |||
53 | #include "isl/aff_type.h" | |||
54 | #include "isl/ast.h" | |||
55 | #include "isl/ast_build.h" | |||
56 | #include "isl/isl-noexceptions.h" | |||
57 | #include "isl/map.h" | |||
58 | #include "isl/set.h" | |||
59 | #include "isl/union_map.h" | |||
60 | #include "isl/union_set.h" | |||
61 | #include "isl/val.h" | |||
62 | #include <algorithm> | |||
63 | #include <cassert> | |||
64 | #include <cstdint> | |||
65 | #include <cstring> | |||
66 | #include <string> | |||
67 | #include <utility> | |||
68 | #include <vector> | |||
69 | ||||
70 | using namespace llvm; | |||
71 | using namespace polly; | |||
72 | ||||
73 | #define DEBUG_TYPE"polly-codegen" "polly-codegen" | |||
74 | ||||
75 | STATISTIC(VersionedScops, "Number of SCoPs that required versioning.")static llvm::Statistic VersionedScops = {"polly-codegen", "VersionedScops" , "Number of SCoPs that required versioning."}; | |||
76 | ||||
77 | STATISTIC(SequentialLoops, "Number of generated sequential for-loops")static llvm::Statistic SequentialLoops = {"polly-codegen", "SequentialLoops" , "Number of generated sequential for-loops"}; | |||
78 | STATISTIC(ParallelLoops, "Number of generated parallel for-loops")static llvm::Statistic ParallelLoops = {"polly-codegen", "ParallelLoops" , "Number of generated parallel for-loops"}; | |||
79 | STATISTIC(VectorLoops, "Number of generated vector for-loops")static llvm::Statistic VectorLoops = {"polly-codegen", "VectorLoops" , "Number of generated vector for-loops"}; | |||
80 | STATISTIC(IfConditions, "Number of generated if-conditions")static llvm::Statistic IfConditions = {"polly-codegen", "IfConditions" , "Number of generated if-conditions"}; | |||
81 | ||||
82 | /// OpenMP backend options | |||
83 | enum class OpenMPBackend { GNU, LLVM }; | |||
84 | ||||
85 | static cl::opt<bool> PollyGenerateRTCPrint( | |||
86 | "polly-codegen-emit-rtc-print", | |||
87 | cl::desc("Emit code that prints the runtime check result dynamically."), | |||
88 | cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); | |||
89 | ||||
90 | // If this option is set we always use the isl AST generator to regenerate | |||
91 | // memory accesses. Without this option set we regenerate expressions using the | |||
92 | // original SCEV expressions and only generate new expressions in case the | |||
93 | // access relation has been changed and consequently must be regenerated. | |||
94 | static cl::opt<bool> PollyGenerateExpressions( | |||
95 | "polly-codegen-generate-expressions", | |||
96 | cl::desc("Generate AST expressions for unmodified and modified accesses"), | |||
97 | cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); | |||
98 | ||||
99 | static cl::opt<int> PollyTargetFirstLevelCacheLineSize( | |||
100 | "polly-target-first-level-cache-line-size", | |||
101 | cl::desc("The size of the first level cache line size specified in bytes."), | |||
102 | cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); | |||
103 | ||||
104 | static cl::opt<OpenMPBackend> PollyOmpBackend( | |||
105 | "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"), | |||
106 | cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP")llvm::cl::OptionEnumValue { "GNU", int(OpenMPBackend::GNU), "GNU OpenMP" }, | |||
107 | clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")llvm::cl::OptionEnumValue { "LLVM", int(OpenMPBackend::LLVM), "LLVM OpenMP" }), | |||
108 | cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory)); | |||
109 | ||||
110 | isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node_for For, | |||
111 | ICmpInst::Predicate &Predicate) { | |||
112 | isl::ast_expr Cond = For.cond(); | |||
113 | isl::ast_expr Iterator = For.iterator(); | |||
114 | assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op &&(static_cast<void> (0)) | |||
115 | "conditional expression is not an atomic upper bound")(static_cast<void> (0)); | |||
116 | ||||
117 | isl_ast_op_typeisl_ast_expr_op_type OpType = isl_ast_expr_get_op_type(Cond.get()); | |||
118 | ||||
119 | switch (OpType) { | |||
120 | case isl_ast_op_leisl_ast_expr_op_le: | |||
121 | Predicate = ICmpInst::ICMP_SLE; | |||
122 | break; | |||
123 | case isl_ast_op_ltisl_ast_expr_op_lt: | |||
124 | Predicate = ICmpInst::ICMP_SLT; | |||
125 | break; | |||
126 | default: | |||
127 | llvm_unreachable("Unexpected comparison type in loop condition")__builtin_unreachable(); | |||
128 | } | |||
129 | ||||
130 | isl::ast_expr Arg0 = Cond.get_op_arg(0); | |||
131 | ||||
132 | assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id &&(static_cast<void> (0)) | |||
133 | "conditional expression is not an atomic upper bound")(static_cast<void> (0)); | |||
134 | ||||
135 | isl::id UBID = Arg0.get_id(); | |||
136 | ||||
137 | assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id &&(static_cast<void> (0)) | |||
138 | "Could not get the iterator")(static_cast<void> (0)); | |||
139 | ||||
140 | isl::id IteratorID = Iterator.get_id(); | |||
141 | ||||
142 | assert(UBID.get() == IteratorID.get() &&(static_cast<void> (0)) | |||
143 | "conditional expression is not an atomic upper bound")(static_cast<void> (0)); | |||
144 | ||||
145 | return Cond.get_op_arg(1); | |||
146 | } | |||
147 | ||||
148 | /// Return true if a return value of Predicate is true for the value represented | |||
149 | /// by passed isl_ast_expr_int. | |||
150 | static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr, | |||
151 | isl_bool (*Predicate)(__isl_keep isl_val *)) { | |||
152 | if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) { | |||
153 | isl_ast_expr_free(Expr); | |||
154 | return false; | |||
155 | } | |||
156 | auto ExprVal = isl_ast_expr_get_val(Expr); | |||
157 | isl_ast_expr_free(Expr); | |||
158 | if (Predicate(ExprVal) != isl_bool_true) { | |||
159 | isl_val_free(ExprVal); | |||
160 | return false; | |||
161 | } | |||
162 | isl_val_free(ExprVal); | |||
163 | return true; | |||
164 | } | |||
165 | ||||
166 | int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For) { | |||
167 | assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for)(static_cast<void> (0)); | |||
168 | isl::ast_node Body = For.body(); | |||
169 | ||||
170 | // First, check if we can actually handle this code. | |||
171 | switch (isl_ast_node_get_type(Body.get())) { | |||
172 | case isl_ast_node_user: | |||
173 | break; | |||
174 | case isl_ast_node_block: { | |||
175 | isl::ast_node_block BodyBlock = Body.as<isl::ast_node_block>(); | |||
176 | isl::ast_node_list List = BodyBlock.children(); | |||
177 | for (isl::ast_node Node : List) { | |||
178 | isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get()); | |||
179 | if (NodeType != isl_ast_node_user) | |||
180 | return -1; | |||
181 | } | |||
182 | break; | |||
183 | } | |||
184 | default: | |||
185 | return -1; | |||
186 | } | |||
187 | ||||
188 | isl::ast_expr Init = For.init(); | |||
189 | if (!checkIslAstExprInt(Init.release(), isl_val_is_zero)) | |||
190 | return -1; | |||
191 | isl::ast_expr Inc = For.inc(); | |||
192 | if (!checkIslAstExprInt(Inc.release(), isl_val_is_one)) | |||
193 | return -1; | |||
194 | CmpInst::Predicate Predicate; | |||
195 | isl::ast_expr UB = getUpperBound(For, Predicate); | |||
196 | if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int) | |||
197 | return -1; | |||
198 | isl::val UpVal = UB.get_val(); | |||
199 | int NumberIterations = UpVal.get_num_si(); | |||
200 | if (NumberIterations < 0) | |||
201 | return -1; | |||
202 | if (Predicate == CmpInst::ICMP_SLT) | |||
203 | return NumberIterations; | |||
204 | else | |||
205 | return NumberIterations + 1; | |||
206 | } | |||
207 | ||||
208 | /// Extract the values and SCEVs needed to generate code for a block. | |||
209 | static int findReferencesInBlock(struct SubtreeReferences &References, | |||
210 | const ScopStmt *Stmt, BasicBlock *BB) { | |||
211 | for (Instruction &Inst : *BB) { | |||
212 | // Include invariant loads | |||
213 | if (isa<LoadInst>(Inst)) | |||
214 | if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst)) | |||
215 | References.Values.insert(InvariantLoad); | |||
216 | ||||
217 | for (Value *SrcVal : Inst.operands()) { | |||
218 | auto *Scope = References.LI.getLoopFor(BB); | |||
219 | if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) { | |||
220 | References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope)); | |||
221 | continue; | |||
222 | } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) | |||
223 | References.Values.insert(NewVal); | |||
224 | } | |||
225 | } | |||
226 | return 0; | |||
227 | } | |||
228 | ||||
229 | void polly::addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr, | |||
230 | bool CreateScalarRefs) { | |||
231 | auto &References = *static_cast<struct SubtreeReferences *>(UserPtr); | |||
232 | ||||
233 | if (Stmt->isBlockStmt()) | |||
234 | findReferencesInBlock(References, Stmt, Stmt->getBasicBlock()); | |||
235 | else if (Stmt->isRegionStmt()) { | |||
236 | for (BasicBlock *BB : Stmt->getRegion()->blocks()) | |||
237 | findReferencesInBlock(References, Stmt, BB); | |||
238 | } else { | |||
239 | assert(Stmt->isCopyStmt())(static_cast<void> (0)); | |||
240 | // Copy Stmts have no instructions that we need to consider. | |||
241 | } | |||
242 | ||||
243 | for (auto &Access : *Stmt) { | |||
244 | if (References.ParamSpace) { | |||
245 | isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); | |||
246 | (*References.ParamSpace) = | |||
247 | References.ParamSpace->align_params(ParamSpace); | |||
248 | } | |||
249 | ||||
250 | if (Access->isLatestArrayKind()) { | |||
251 | auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr(); | |||
252 | if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) | |||
253 | if (Stmt->getParent()->contains(OpInst)) | |||
254 | continue; | |||
255 | ||||
256 | References.Values.insert(BasePtr); | |||
257 | continue; | |||
258 | } | |||
259 | ||||
260 | if (CreateScalarRefs) | |||
261 | References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); | |||
262 | } | |||
263 | } | |||
264 | ||||
265 | /// Extract the out-of-scop values and SCEVs referenced from a set describing | |||
266 | /// a ScopStmt. | |||
267 | /// | |||
268 | /// This includes the SCEVUnknowns referenced by the SCEVs used in the | |||
269 | /// statement and the base pointers of the memory accesses. For scalar | |||
270 | /// statements we force the generation of alloca memory locations and list | |||
271 | /// these locations in the set of out-of-scop values as well. | |||
272 | /// | |||
273 | /// @param Set A set which references the ScopStmt we are interested in. | |||
274 | /// @param UserPtr A void pointer that can be casted to a SubtreeReferences | |||
275 | /// structure. | |||
276 | static void addReferencesFromStmtSet(isl::set Set, | |||
277 | struct SubtreeReferences *UserPtr) { | |||
278 | isl::id Id = Set.get_tuple_id(); | |||
279 | auto *Stmt = static_cast<const ScopStmt *>(Id.get_user()); | |||
280 | return addReferencesFromStmt(Stmt, UserPtr); | |||
281 | } | |||
282 | ||||
283 | /// Extract the out-of-scop values and SCEVs referenced from a union set | |||
284 | /// referencing multiple ScopStmts. | |||
285 | /// | |||
286 | /// This includes the SCEVUnknowns referenced by the SCEVs used in the | |||
287 | /// statement and the base pointers of the memory accesses. For scalar | |||
288 | /// statements we force the generation of alloca memory locations and list | |||
289 | /// these locations in the set of out-of-scop values as well. | |||
290 | /// | |||
291 | /// @param USet A union set referencing the ScopStmts we are interested | |||
292 | /// in. | |||
293 | /// @param References The SubtreeReferences data structure through which | |||
294 | /// results are returned and further information is | |||
295 | /// provided. | |||
296 | static void | |||
297 | addReferencesFromStmtUnionSet(isl::union_set USet, | |||
298 | struct SubtreeReferences &References) { | |||
299 | ||||
300 | for (isl::set Set : USet.get_set_list()) | |||
301 | addReferencesFromStmtSet(Set, &References); | |||
302 | } | |||
303 | ||||
304 | isl::union_map | |||
305 | IslNodeBuilder::getScheduleForAstNode(const isl::ast_node &Node) { | |||
306 | return IslAstInfo::getSchedule(Node); | |||
307 | } | |||
308 | ||||
309 | void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node &For, | |||
310 | SetVector<Value *> &Values, | |||
311 | SetVector<const Loop *> &Loops) { | |||
312 | SetVector<const SCEV *> SCEVs; | |||
313 | struct SubtreeReferences References = { | |||
314 | LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; | |||
315 | ||||
316 | for (const auto &I : IDToValue) | |||
317 | Values.insert(I.second); | |||
318 | ||||
319 | // NOTE: this is populated in IslNodeBuilder::addParameters | |||
320 | for (const auto &I : OutsideLoopIterations) | |||
321 | Values.insert(cast<SCEVUnknown>(I.second)->getValue()); | |||
322 | ||||
323 | isl::union_set Schedule = getScheduleForAstNode(For).domain(); | |||
324 | addReferencesFromStmtUnionSet(Schedule, References); | |||
325 | ||||
326 | for (const SCEV *Expr : SCEVs) { | |||
327 | findValues(Expr, SE, Values); | |||
328 | findLoops(Expr, Loops); | |||
329 | } | |||
330 | ||||
331 | Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); | |||
332 | ||||
333 | /// Note: Code generation of induction variables of loops outside Scops | |||
334 | /// | |||
335 | /// Remove loops that contain the scop or that are part of the scop, as they | |||
336 | /// are considered local. This leaves only loops that are before the scop, but | |||
337 | /// do not contain the scop itself. | |||
338 | /// We ignore loops perfectly contained in the Scop because these are already | |||
339 | /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops | |||
340 | /// whose induction variables are referred to by the Scop, but the Scop is not | |||
341 | /// fully contained in these Loops. Since there can be many of these, | |||
342 | /// we choose to codegen these on-demand. | |||
343 | /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable. | |||
344 | Loops.remove_if([this](const Loop *L) { | |||
345 | return S.contains(L) || L->contains(S.getEntry()); | |||
346 | }); | |||
347 | ||||
348 | // Contains Values that may need to be replaced with other values | |||
349 | // due to replacements from the ValueMap. We should make sure | |||
350 | // that we return correctly remapped values. | |||
351 | // NOTE: this code path is tested by: | |||
352 | // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll | |||
353 | // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll | |||
354 | SetVector<Value *> ReplacedValues; | |||
355 | for (Value *V : Values) { | |||
356 | ReplacedValues.insert(getLatestValue(V)); | |||
357 | } | |||
358 | Values = ReplacedValues; | |||
359 | } | |||
360 | ||||
361 | void IslNodeBuilder::updateValues(ValueMapT &NewValues) { | |||
362 | SmallPtrSet<Value *, 5> Inserted; | |||
363 | ||||
364 | for (const auto &I : IDToValue) { | |||
365 | IDToValue[I.first] = NewValues[I.second]; | |||
366 | Inserted.insert(I.second); | |||
367 | } | |||
368 | ||||
369 | for (const auto &I : NewValues) { | |||
370 | if (Inserted.count(I.first)) | |||
371 | continue; | |||
372 | ||||
373 | ValueMap[I.first] = I.second; | |||
374 | } | |||
375 | } | |||
376 | ||||
377 | Value *IslNodeBuilder::getLatestValue(Value *Original) const { | |||
378 | auto It = ValueMap.find(Original); | |||
379 | if (It == ValueMap.end()) | |||
380 | return Original; | |||
381 | return It->second; | |||
382 | } | |||
383 | ||||
384 | void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, | |||
385 | std::vector<Value *> &IVS, | |||
386 | __isl_take isl_id *IteratorID, | |||
387 | __isl_take isl_union_map *Schedule) { | |||
388 | isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | |||
389 | isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | |||
390 | isl_id *Id = isl_ast_expr_get_id(StmtExpr); | |||
391 | isl_ast_expr_free(StmtExpr); | |||
392 | ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); | |||
393 | std::vector<LoopToScevMapT> VLTS(IVS.size()); | |||
394 | ||||
395 | isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release()); | |||
396 | Schedule = isl_union_map_intersect_domain(Schedule, Domain); | |||
397 | isl_map *S = isl_map_from_union_map(Schedule); | |||
398 | ||||
399 | auto *NewAccesses = createNewAccesses(Stmt, User); | |||
400 | createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); | |||
401 | VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); | |||
402 | isl_id_to_ast_expr_free(NewAccesses); | |||
403 | isl_map_free(S); | |||
404 | isl_id_free(Id); | |||
405 | isl_ast_node_free(User); | |||
406 | } | |||
407 | ||||
408 | void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { | |||
409 | auto *Id = isl_ast_node_mark_get_id(Node); | |||
410 | auto Child = isl_ast_node_mark_get_node(Node); | |||
411 | isl_ast_node_free(Node); | |||
412 | // If a child node of a 'SIMD mark' is a loop that has a single iteration, | |||
413 | // it will be optimized away and we should skip it. | |||
414 | if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && | |||
415 | isl_ast_node_get_type(Child) == isl_ast_node_for) { | |||
416 | bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; | |||
417 | int VectorWidth = | |||
418 | getNumberOfIterations(isl::manage_copy(Child).as<isl::ast_node_for>()); | |||
419 | if (Vector && 1 < VectorWidth && VectorWidth <= 16) | |||
420 | createForVector(Child, VectorWidth); | |||
421 | else | |||
422 | createForSequential(isl::manage(Child).as<isl::ast_node_for>(), true); | |||
423 | isl_id_free(Id); | |||
424 | return; | |||
425 | } | |||
426 | if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) { | |||
427 | auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id)); | |||
428 | Annotator.addInterIterationAliasFreeBasePtr(BasePtr); | |||
429 | } | |||
430 | ||||
431 | BandAttr *ChildLoopAttr = getLoopAttr(isl::manage_copy(Id)); | |||
432 | BandAttr *AncestorLoopAttr; | |||
433 | if (ChildLoopAttr) { | |||
434 | // Save current LoopAttr environment to restore again when leaving this | |||
435 | // subtree. This means there was no loop between the ancestor LoopAttr and | |||
436 | // this mark, i.e. the ancestor LoopAttr did not directly mark a loop. This | |||
437 | // can happen e.g. if the AST build peeled or unrolled the loop. | |||
438 | AncestorLoopAttr = Annotator.getStagingAttrEnv(); | |||
439 | ||||
440 | Annotator.getStagingAttrEnv() = ChildLoopAttr; | |||
441 | } | |||
442 | ||||
443 | create(Child); | |||
444 | ||||
445 | if (ChildLoopAttr) { | |||
446 | assert(Annotator.getStagingAttrEnv() == ChildLoopAttr &&(static_cast<void> (0)) | |||
447 | "Nest must not overwrite loop attr environment")(static_cast<void> (0)); | |||
448 | Annotator.getStagingAttrEnv() = AncestorLoopAttr; | |||
449 | } | |||
450 | ||||
451 | isl_id_free(Id); | |||
452 | } | |||
453 | ||||
454 | void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, | |||
455 | int VectorWidth) { | |||
456 | isl_ast_node *Body = isl_ast_node_for_get_body(For); | |||
457 | isl_ast_expr *Init = isl_ast_node_for_get_init(For); | |||
458 | isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); | |||
459 | isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); | |||
460 | isl_id *IteratorID = isl_ast_expr_get_id(Iterator); | |||
461 | ||||
462 | Value *ValueLB = ExprBuilder.create(Init); | |||
463 | Value *ValueInc = ExprBuilder.create(Inc); | |||
464 | ||||
465 | Type *MaxType = ExprBuilder.getType(Iterator); | |||
466 | MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); | |||
467 | MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); | |||
468 | ||||
469 | if (MaxType != ValueLB->getType()) | |||
470 | ValueLB = Builder.CreateSExt(ValueLB, MaxType); | |||
471 | if (MaxType != ValueInc->getType()) | |||
472 | ValueInc = Builder.CreateSExt(ValueInc, MaxType); | |||
473 | ||||
474 | std::vector<Value *> IVS(VectorWidth); | |||
475 | IVS[0] = ValueLB; | |||
476 | ||||
477 | for (int i = 1; i < VectorWidth; i++) | |||
478 | IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); | |||
479 | ||||
480 | isl::union_map Schedule = getScheduleForAstNode(isl::manage_copy(For)); | |||
481 | assert(!Schedule.is_null() &&(static_cast<void> (0)) | |||
482 | "For statement annotation does not contain its schedule")(static_cast<void> (0)); | |||
483 | ||||
484 | IDToValue[IteratorID] = ValueLB; | |||
485 | ||||
486 | switch (isl_ast_node_get_type(Body)) { | |||
487 | case isl_ast_node_user: | |||
488 | createUserVector(Body, IVS, isl_id_copy(IteratorID), Schedule.copy()); | |||
489 | break; | |||
490 | case isl_ast_node_block: { | |||
491 | isl_ast_node_list *List = isl_ast_node_block_get_children(Body); | |||
492 | ||||
493 | for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) | |||
494 | createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, | |||
495 | isl_id_copy(IteratorID), Schedule.copy()); | |||
496 | ||||
497 | isl_ast_node_free(Body); | |||
498 | isl_ast_node_list_free(List); | |||
499 | break; | |||
500 | } | |||
501 | default: | |||
502 | isl_ast_node_dump(Body); | |||
503 | llvm_unreachable("Unhandled isl_ast_node in vectorizer")__builtin_unreachable(); | |||
504 | } | |||
505 | ||||
506 | IDToValue.erase(IDToValue.find(IteratorID)); | |||
507 | isl_id_free(IteratorID); | |||
508 | ||||
509 | isl_ast_node_free(For); | |||
510 | isl_ast_expr_free(Iterator); | |||
511 | ||||
512 | VectorLoops++; | |||
513 | } | |||
514 | ||||
515 | /// Restore the initial ordering of dimensions of the band node | |||
516 | /// | |||
517 | /// In case the band node represents all the dimensions of the iteration | |||
518 | /// domain, recreate the band node to restore the initial ordering of the | |||
519 | /// dimensions. | |||
520 | /// | |||
521 | /// @param Node The band node to be modified. | |||
522 | /// @return The modified schedule node. | |||
523 | static bool IsLoopVectorizerDisabled(isl::ast_node_for Node) { | |||
524 | assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for)(static_cast<void> (0)); | |||
525 | isl::ast_node Body = Node.body(); | |||
526 | if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark) | |||
527 | return false; | |||
528 | ||||
529 | isl::ast_node_mark BodyMark = Body.as<isl::ast_node_mark>(); | |||
530 | auto Id = BodyMark.id(); | |||
531 | if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0) | |||
532 | return true; | |||
533 | return false; | |||
534 | } | |||
535 | ||||
536 | void IslNodeBuilder::createForSequential(isl::ast_node_for For, | |||
537 | bool MarkParallel) { | |||
538 | Value *ValueLB, *ValueUB, *ValueInc; | |||
539 | Type *MaxType; | |||
540 | BasicBlock *ExitBlock; | |||
541 | Value *IV; | |||
542 | CmpInst::Predicate Predicate; | |||
543 | ||||
544 | bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For); | |||
545 | ||||
546 | isl::ast_node Body = For.body(); | |||
547 | ||||
548 | // isl_ast_node_for_is_degenerate(For) | |||
549 | // | |||
550 | // TODO: For degenerated loops we could generate a plain assignment. | |||
551 | // However, for now we just reuse the logic for normal loops, which will | |||
552 | // create a loop with a single iteration. | |||
553 | ||||
554 | isl::ast_expr Init = For.init(); | |||
555 | isl::ast_expr Inc = For.inc(); | |||
556 | isl::ast_expr Iterator = For.iterator(); | |||
557 | isl::id IteratorID = Iterator.get_id(); | |||
558 | isl::ast_expr UB = getUpperBound(For, Predicate); | |||
559 | ||||
560 | ValueLB = ExprBuilder.create(Init.release()); | |||
561 | ValueUB = ExprBuilder.create(UB.release()); | |||
562 | ValueInc = ExprBuilder.create(Inc.release()); | |||
563 | ||||
564 | MaxType = ExprBuilder.getType(Iterator.get()); | |||
565 | MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); | |||
566 | MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); | |||
567 | MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); | |||
568 | ||||
569 | if (MaxType != ValueLB->getType()) | |||
570 | ValueLB = Builder.CreateSExt(ValueLB, MaxType); | |||
571 | if (MaxType != ValueUB->getType()) | |||
572 | ValueUB = Builder.CreateSExt(ValueUB, MaxType); | |||
573 | if (MaxType != ValueInc->getType()) | |||
574 | ValueInc = Builder.CreateSExt(ValueInc, MaxType); | |||
575 | ||||
576 | // If we can show that LB <Predicate> UB holds at least once, we can | |||
577 | // omit the GuardBB in front of the loop. | |||
578 | bool UseGuardBB = | |||
579 | !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); | |||
580 | IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, | |||
581 | Predicate, &Annotator, MarkParallel, UseGuardBB, | |||
582 | LoopVectorizerDisabled); | |||
583 | IDToValue[IteratorID.get()] = IV; | |||
584 | ||||
585 | create(Body.release()); | |||
586 | ||||
587 | Annotator.popLoop(MarkParallel); | |||
588 | ||||
589 | IDToValue.erase(IDToValue.find(IteratorID.get())); | |||
590 | ||||
591 | Builder.SetInsertPoint(&ExitBlock->front()); | |||
592 | ||||
593 | SequentialLoops++; | |||
594 | } | |||
595 | ||||
596 | /// Remove the BBs contained in a (sub)function from the dominator tree. | |||
597 | /// | |||
598 | /// This function removes the basic blocks that are part of a subfunction from | |||
599 | /// the dominator tree. Specifically, when generating code it may happen that at | |||
600 | /// some point the code generation continues in a new sub-function (e.g., when | |||
601 | /// generating OpenMP code). The basic blocks that are created in this | |||
602 | /// sub-function are then still part of the dominator tree of the original | |||
603 | /// function, such that the dominator tree reaches over function boundaries. | |||
604 | /// This is not only incorrect, but also causes crashes. This function now | |||
605 | /// removes from the dominator tree all basic blocks that are dominated (and | |||
606 | /// consequently reachable) from the entry block of this (sub)function. | |||
607 | /// | |||
608 | /// FIXME: A LLVM (function or region) pass should not touch anything outside of | |||
609 | /// the function/region it runs on. Hence, the pure need for this function shows | |||
610 | /// that we do not comply to this rule. At the moment, this does not cause any | |||
611 | /// issues, but we should be aware that such issues may appear. Unfortunately | |||
612 | /// the current LLVM pass infrastructure does not allow to make Polly a module | |||
613 | /// or call-graph pass to solve this issue, as such a pass would not have access | |||
614 | /// to the per-function analyses passes needed by Polly. A future pass manager | |||
615 | /// infrastructure is supposed to enable such kind of access possibly allowing | |||
616 | /// us to create a cleaner solution here. | |||
617 | /// | |||
618 | /// FIXME: Instead of adding the dominance information and then dropping it | |||
619 | /// later on, we should try to just not add it in the first place. This requires | |||
620 | /// some careful testing to make sure this does not break in interaction with | |||
621 | /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or | |||
622 | /// which may try to update it. | |||
623 | /// | |||
624 | /// @param F The function which contains the BBs to removed. | |||
625 | /// @param DT The dominator tree from which to remove the BBs. | |||
626 | static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { | |||
627 | DomTreeNode *N = DT.getNode(&F->getEntryBlock()); | |||
628 | std::vector<BasicBlock *> Nodes; | |||
629 | ||||
630 | // We can only remove an element from the dominator tree, if all its children | |||
631 | // have been removed. To ensure this we obtain the list of nodes to remove | |||
632 | // using a post-order tree traversal. | |||
633 | for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) | |||
634 | Nodes.push_back(I->getBlock()); | |||
635 | ||||
636 | for (BasicBlock *BB : Nodes) | |||
637 | DT.eraseNode(BB); | |||
638 | } | |||
639 | ||||
640 | void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { | |||
641 | isl_ast_node *Body; | |||
642 | isl_ast_expr *Init, *Inc, *Iterator, *UB; | |||
643 | isl_id *IteratorID; | |||
644 | Value *ValueLB, *ValueUB, *ValueInc; | |||
645 | Type *MaxType; | |||
646 | Value *IV; | |||
647 | CmpInst::Predicate Predicate; | |||
648 | ||||
649 | // The preamble of parallel code interacts different than normal code with | |||
650 | // e.g., scalar initialization. Therefore, we ensure the parallel code is | |||
651 | // separated from the last basic block. | |||
652 | BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), | |||
653 | &*Builder.GetInsertPoint(), &DT, &LI); | |||
654 | ParBB->setName("polly.parallel.for"); | |||
655 | Builder.SetInsertPoint(&ParBB->front()); | |||
656 | ||||
657 | Body = isl_ast_node_for_get_body(For); | |||
658 | Init = isl_ast_node_for_get_init(For); | |||
659 | Inc = isl_ast_node_for_get_inc(For); | |||
660 | Iterator = isl_ast_node_for_get_iterator(For); | |||
661 | IteratorID = isl_ast_expr_get_id(Iterator); | |||
662 | UB = getUpperBound(isl::manage_copy(For).as<isl::ast_node_for>(), Predicate) | |||
663 | .release(); | |||
664 | ||||
665 | ValueLB = ExprBuilder.create(Init); | |||
666 | ValueUB = ExprBuilder.create(UB); | |||
667 | ValueInc = ExprBuilder.create(Inc); | |||
668 | ||||
669 | // OpenMP always uses SLE. In case the isl generated AST uses a SLT | |||
670 | // expression, we need to adjust the loop bound by one. | |||
671 | if (Predicate == CmpInst::ICMP_SLT) | |||
672 | ValueUB = Builder.CreateAdd( | |||
673 | ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); | |||
674 | ||||
675 | MaxType = ExprBuilder.getType(Iterator); | |||
676 | MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); | |||
677 | MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); | |||
678 | MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); | |||
679 | ||||
680 | if (MaxType != ValueLB->getType()) | |||
681 | ValueLB = Builder.CreateSExt(ValueLB, MaxType); | |||
682 | if (MaxType != ValueUB->getType()) | |||
683 | ValueUB = Builder.CreateSExt(ValueUB, MaxType); | |||
684 | if (MaxType != ValueInc->getType()) | |||
685 | ValueInc = Builder.CreateSExt(ValueInc, MaxType); | |||
686 | ||||
687 | BasicBlock::iterator LoopBody; | |||
688 | ||||
689 | SetVector<Value *> SubtreeValues; | |||
690 | SetVector<const Loop *> Loops; | |||
691 | ||||
692 | getReferencesInSubtree(isl::manage_copy(For), SubtreeValues, Loops); | |||
693 | ||||
694 | // Create for all loops we depend on values that contain the current loop | |||
695 | // iteration. These values are necessary to generate code for SCEVs that | |||
696 | // depend on such loops. As a result we need to pass them to the subfunction. | |||
697 | // See [Code generation of induction variables of loops outside Scops] | |||
698 | for (const Loop *L : Loops) { | |||
699 | Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L); | |||
700 | SubtreeValues.insert(LoopInductionVar); | |||
701 | } | |||
702 | ||||
703 | ValueMapT NewValues; | |||
704 | ||||
705 | std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr; | |||
706 | ||||
707 | switch (PollyOmpBackend) { | |||
708 | case OpenMPBackend::GNU: | |||
709 | ParallelLoopGenPtr.reset( | |||
710 | new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL)); | |||
711 | break; | |||
712 | case OpenMPBackend::LLVM: | |||
713 | ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL)); | |||
714 | break; | |||
715 | } | |||
716 | ||||
717 | IV = ParallelLoopGenPtr->createParallelLoop( | |||
718 | ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody); | |||
719 | BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); | |||
720 | Builder.SetInsertPoint(&*LoopBody); | |||
721 | ||||
722 | // Remember the parallel subfunction | |||
723 | ParallelSubfunctions.push_back(LoopBody->getFunction()); | |||
724 | ||||
725 | // Save the current values. | |||
726 | auto ValueMapCopy = ValueMap; | |||
727 | IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; | |||
728 | ||||
729 | updateValues(NewValues); | |||
730 | IDToValue[IteratorID] = IV; | |||
731 | ||||
732 | ValueMapT NewValuesReverse; | |||
733 | ||||
734 | for (auto P : NewValues) | |||
735 | NewValuesReverse[P.second] = P.first; | |||
736 | ||||
737 | Annotator.addAlternativeAliasBases(NewValuesReverse); | |||
738 | ||||
739 | create(Body); | |||
740 | ||||
741 | Annotator.resetAlternativeAliasBases(); | |||
742 | // Restore the original values. | |||
743 | ValueMap = ValueMapCopy; | |||
744 | IDToValue = IDToValueCopy; | |||
745 | ||||
746 | Builder.SetInsertPoint(&*AfterLoop); | |||
747 | removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); | |||
748 | ||||
749 | for (const Loop *L : Loops) | |||
750 | OutsideLoopIterations.erase(L); | |||
751 | ||||
752 | isl_ast_node_free(For); | |||
753 | isl_ast_expr_free(Iterator); | |||
754 | isl_id_free(IteratorID); | |||
755 | ||||
756 | ParallelLoops++; | |||
757 | } | |||
758 | ||||
759 | /// Return whether any of @p Node's statements contain partial accesses. | |||
760 | /// | |||
761 | /// Partial accesses are not supported by Polly's vector code generator. | |||
762 | static bool hasPartialAccesses(__isl_take isl_ast_node *Node) { | |||
763 | return isl_ast_node_foreach_descendant_top_down( | |||
764 | Node, | |||
765 | [](isl_ast_node *Node, void *User) -> isl_bool { | |||
766 | if (isl_ast_node_get_type(Node) != isl_ast_node_user) | |||
767 | return isl_bool_true; | |||
768 | ||||
769 | isl::ast_expr Expr = | |||
770 | isl::manage(isl_ast_node_user_get_expr(Node)); | |||
771 | isl::ast_expr StmtExpr = Expr.get_op_arg(0); | |||
772 | isl::id Id = StmtExpr.get_id(); | |||
773 | ||||
774 | ScopStmt *Stmt = | |||
775 | static_cast<ScopStmt *>(isl_id_get_user(Id.get())); | |||
776 | isl::set StmtDom = Stmt->getDomain(); | |||
777 | for (auto *MA : *Stmt) { | |||
778 | if (MA->isLatestPartialAccess()) | |||
779 | return isl_bool_error; | |||
780 | } | |||
781 | return isl_bool_true; | |||
782 | }, | |||
783 | nullptr) == isl_stat_error; | |||
784 | } | |||
785 | ||||
786 | void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { | |||
787 | bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; | |||
788 | ||||
789 | if (Vector && IslAstInfo::isInnermostParallel(isl::manage_copy(For)) && | |||
790 | !IslAstInfo::isReductionParallel(isl::manage_copy(For))) { | |||
791 | int VectorWidth = | |||
792 | getNumberOfIterations(isl::manage_copy(For).as<isl::ast_node_for>()); | |||
793 | if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) { | |||
794 | createForVector(For, VectorWidth); | |||
795 | return; | |||
796 | } | |||
797 | } | |||
798 | ||||
799 | if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For))) { | |||
800 | createForParallel(For); | |||
801 | return; | |||
802 | } | |||
803 | bool Parallel = (IslAstInfo::isParallel(isl::manage_copy(For)) && | |||
804 | !IslAstInfo::isReductionParallel(isl::manage_copy(For))); | |||
805 | createForSequential(isl::manage(For).as<isl::ast_node_for>(), Parallel); | |||
806 | } | |||
807 | ||||
808 | void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { | |||
809 | isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); | |||
810 | ||||
811 | Function *F = Builder.GetInsertBlock()->getParent(); | |||
812 | LLVMContext &Context = F->getContext(); | |||
813 | ||||
814 | BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), | |||
815 | &*Builder.GetInsertPoint(), &DT, &LI); | |||
816 | CondBB->setName("polly.cond"); | |||
817 | BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); | |||
818 | MergeBB->setName("polly.merge"); | |||
819 | BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); | |||
820 | BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); | |||
821 | ||||
822 | DT.addNewBlock(ThenBB, CondBB); | |||
823 | DT.addNewBlock(ElseBB, CondBB); | |||
824 | DT.changeImmediateDominator(MergeBB, CondBB); | |||
825 | ||||
826 | Loop *L = LI.getLoopFor(CondBB); | |||
827 | if (L) { | |||
828 | L->addBasicBlockToLoop(ThenBB, LI); | |||
829 | L->addBasicBlockToLoop(ElseBB, LI); | |||
830 | } | |||
831 | ||||
832 | CondBB->getTerminator()->eraseFromParent(); | |||
833 | ||||
834 | Builder.SetInsertPoint(CondBB); | |||
835 | Value *Predicate = ExprBuilder.create(Cond); | |||
836 | Builder.CreateCondBr(Predicate, ThenBB, ElseBB); | |||
837 | Builder.SetInsertPoint(ThenBB); | |||
838 | Builder.CreateBr(MergeBB); | |||
839 | Builder.SetInsertPoint(ElseBB); | |||
840 | Builder.CreateBr(MergeBB); | |||
841 | Builder.SetInsertPoint(&ThenBB->front()); | |||
842 | ||||
843 | create(isl_ast_node_if_get_then(If)); | |||
844 | ||||
845 | Builder.SetInsertPoint(&ElseBB->front()); | |||
846 | ||||
847 | if (isl_ast_node_if_has_else(If)) | |||
848 | create(isl_ast_node_if_get_else(If)); | |||
849 | ||||
850 | Builder.SetInsertPoint(&MergeBB->front()); | |||
851 | ||||
852 | isl_ast_node_free(If); | |||
853 | ||||
854 | IfConditions++; | |||
855 | } | |||
856 | ||||
857 | __isl_give isl_id_to_ast_expr * | |||
858 | IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, | |||
859 | __isl_keep isl_ast_node *Node) { | |||
860 | isl::id_to_ast_expr NewAccesses = | |||
861 | isl::id_to_ast_expr::alloc(Stmt->getParent()->getIslCtx(), 0); | |||
862 | ||||
863 | isl::ast_build Build = IslAstInfo::getBuild(isl::manage_copy(Node)); | |||
864 | assert(!Build.is_null() && "Could not obtain isl_ast_build from user node")(static_cast<void> (0)); | |||
865 | Stmt->setAstBuild(Build); | |||
866 | ||||
867 | for (auto *MA : *Stmt) { | |||
868 | if (!MA->hasNewAccessRelation()) { | |||
869 | if (PollyGenerateExpressions) { | |||
870 | if (!MA->isAffine()) | |||
871 | continue; | |||
872 | if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) | |||
873 | continue; | |||
874 | ||||
875 | auto *BasePtr = | |||
876 | dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); | |||
877 | if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) | |||
878 | continue; | |||
879 | } else { | |||
880 | continue; | |||
881 | } | |||
882 | } | |||
883 | assert(MA->isAffine() &&(static_cast<void> (0)) | |||
884 | "Only affine memory accesses can be code generated")(static_cast<void> (0)); | |||
885 | ||||
886 | isl::union_map Schedule = Build.get_schedule(); | |||
887 | ||||
888 | #ifndef NDEBUG1 | |||
889 | if (MA->isRead()) { | |||
890 | auto Dom = Stmt->getDomain().release(); | |||
891 | auto SchedDom = isl_set_from_union_set(Schedule.domain().release()); | |||
892 | auto AccDom = isl_map_domain(MA->getAccessRelation().release()); | |||
893 | Dom = isl_set_intersect_params(Dom, | |||
894 | Stmt->getParent()->getContext().release()); | |||
895 | SchedDom = isl_set_intersect_params( | |||
896 | SchedDom, Stmt->getParent()->getContext().release()); | |||
897 | assert(isl_set_is_subset(SchedDom, AccDom) &&(static_cast<void> (0)) | |||
898 | "Access relation not defined on full schedule domain")(static_cast<void> (0)); | |||
899 | assert(isl_set_is_subset(Dom, AccDom) &&(static_cast<void> (0)) | |||
900 | "Access relation not defined on full domain")(static_cast<void> (0)); | |||
901 | isl_set_free(AccDom); | |||
902 | isl_set_free(SchedDom); | |||
903 | isl_set_free(Dom); | |||
904 | } | |||
905 | #endif | |||
906 | ||||
907 | isl::pw_multi_aff PWAccRel = MA->applyScheduleToAccessRelation(Schedule); | |||
908 | ||||
909 | // isl cannot generate an index expression for access-nothing accesses. | |||
910 | isl::set AccDomain = PWAccRel.domain(); | |||
911 | isl::set Context = S.getContext(); | |||
912 | AccDomain = AccDomain.intersect_params(Context); | |||
913 | if (AccDomain.is_empty()) | |||
914 | continue; | |||
915 | ||||
916 | isl::ast_expr AccessExpr = Build.access_from(PWAccRel); | |||
917 | NewAccesses = NewAccesses.set(MA->getId(), AccessExpr); | |||
918 | } | |||
919 | ||||
920 | return NewAccesses.release(); | |||
921 | } | |||
922 | ||||
923 | void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, | |||
924 | ScopStmt *Stmt, LoopToScevMapT <S) { | |||
925 | assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&(static_cast<void> (0)) | |||
926 | "Expression of type 'op' expected")(static_cast<void> (0)); | |||
927 | assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&(static_cast<void> (0)) | |||
928 | "Operation of type 'call' expected")(static_cast<void> (0)); | |||
929 | for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { | |||
930 | isl_ast_expr *SubExpr; | |||
931 | Value *V; | |||
932 | ||||
933 | SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); | |||
934 | V = ExprBuilder.create(SubExpr); | |||
935 | ScalarEvolution *SE = Stmt->getParent()->getSE(); | |||
936 | LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); | |||
937 | } | |||
938 | ||||
939 | isl_ast_expr_free(Expr); | |||
940 | } | |||
941 | ||||
942 | void IslNodeBuilder::createSubstitutionsVector( | |||
943 | __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, | |||
944 | std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, | |||
945 | __isl_take isl_id *IteratorID) { | |||
946 | int i = 0; | |||
947 | ||||
948 | Value *OldValue = IDToValue[IteratorID]; | |||
949 | for (Value *IV : IVS) { | |||
950 | IDToValue[IteratorID] = IV; | |||
951 | createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); | |||
952 | i++; | |||
953 | } | |||
954 | ||||
955 | IDToValue[IteratorID] = OldValue; | |||
956 | isl_id_free(IteratorID); | |||
957 | isl_ast_expr_free(Expr); | |||
958 | } | |||
959 | ||||
960 | void IslNodeBuilder::generateCopyStmt( | |||
961 | ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { | |||
962 | assert(Stmt->size() == 2)(static_cast<void> (0)); | |||
963 | auto ReadAccess = Stmt->begin(); | |||
964 | auto WriteAccess = ReadAccess++; | |||
965 | assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite())(static_cast<void> (0)); | |||
966 | assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&(static_cast<void> (0)) | |||
967 | "Accesses use the same data type")(static_cast<void> (0)); | |||
968 | assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind())(static_cast<void> (0)); | |||
969 | auto *AccessExpr = | |||
970 | isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release()); | |||
971 | auto *LoadValue = ExprBuilder.create(AccessExpr); | |||
972 | AccessExpr = | |||
973 | isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release()); | |||
974 | auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr).first; | |||
975 | Builder.CreateStore(LoadValue, StoreAddr); | |||
976 | } | |||
977 | ||||
978 | Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) { | |||
979 | assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&(static_cast<void> (0)) | |||
980 | "trying to materialize loop induction variable twice")(static_cast<void> (0)); | |||
981 | const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), | |||
982 | SE.getUnknown(Builder.getInt64(1)), L, | |||
983 | SCEV::FlagAnyWrap); | |||
984 | Value *V = generateSCEV(OuterLIV); | |||
985 | OutsideLoopIterations[L] = SE.getUnknown(V); | |||
986 | return V; | |||
987 | } | |||
988 | ||||
989 | void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { | |||
990 | LoopToScevMapT LTS; | |||
991 | isl_id *Id; | |||
992 | ScopStmt *Stmt; | |||
993 | ||||
994 | isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | |||
995 | isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | |||
996 | Id = isl_ast_expr_get_id(StmtExpr); | |||
997 | isl_ast_expr_free(StmtExpr); | |||
998 | ||||
999 | LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); | |||
1000 | ||||
1001 | Stmt = (ScopStmt *)isl_id_get_user(Id); | |||
1002 | auto *NewAccesses = createNewAccesses(Stmt, User); | |||
1003 | if (Stmt->isCopyStmt()) { | |||
1004 | generateCopyStmt(Stmt, NewAccesses); | |||
1005 | isl_ast_expr_free(Expr); | |||
1006 | } else { | |||
1007 | createSubstitutions(Expr, Stmt, LTS); | |||
1008 | ||||
1009 | if (Stmt->isBlockStmt()) | |||
1010 | BlockGen.copyStmt(*Stmt, LTS, NewAccesses); | |||
1011 | else | |||
1012 | RegionGen.copyStmt(*Stmt, LTS, NewAccesses); | |||
1013 | } | |||
1014 | ||||
1015 | isl_id_to_ast_expr_free(NewAccesses); | |||
1016 | isl_ast_node_free(User); | |||
1017 | isl_id_free(Id); | |||
1018 | } | |||
1019 | ||||
1020 | void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { | |||
1021 | isl_ast_node_list *List = isl_ast_node_block_get_children(Block); | |||
1022 | ||||
1023 | for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) | |||
1024 | create(isl_ast_node_list_get_ast_node(List, i)); | |||
1025 | ||||
1026 | isl_ast_node_free(Block); | |||
1027 | isl_ast_node_list_free(List); | |||
1028 | } | |||
1029 | ||||
1030 | void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { | |||
1031 | switch (isl_ast_node_get_type(Node)) { | |||
1032 | case isl_ast_node_error: | |||
1033 | llvm_unreachable("code generation error")__builtin_unreachable(); | |||
1034 | case isl_ast_node_mark: | |||
1035 | createMark(Node); | |||
1036 | return; | |||
1037 | case isl_ast_node_for: | |||
1038 | createFor(Node); | |||
1039 | return; | |||
1040 | case isl_ast_node_if: | |||
1041 | createIf(Node); | |||
1042 | return; | |||
1043 | case isl_ast_node_user: | |||
1044 | createUser(Node); | |||
1045 | return; | |||
1046 | case isl_ast_node_block: | |||
1047 | createBlock(Node); | |||
1048 | return; | |||
1049 | } | |||
1050 | ||||
1051 | llvm_unreachable("Unknown isl_ast_node type")__builtin_unreachable(); | |||
1052 | } | |||
1053 | ||||
1054 | bool IslNodeBuilder::materializeValue(isl_id *Id) { | |||
1055 | // If the Id is already mapped, skip it. | |||
1056 | if (!IDToValue.count(Id)) { | |||
1057 | auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); | |||
1058 | Value *V = nullptr; | |||
1059 | ||||
1060 | // Parameters could refer to invariant loads that need to be | |||
1061 | // preloaded before we can generate code for the parameter. Thus, | |||
1062 | // check if any value referred to in ParamSCEV is an invariant load | |||
1063 | // and if so make sure its equivalence class is preloaded. | |||
1064 | SetVector<Value *> Values; | |||
1065 | findValues(ParamSCEV, SE, Values); | |||
1066 | for (auto *Val : Values) { | |||
1067 | // Check if the value is an instruction in a dead block within the SCoP | |||
1068 | // and if so do not code generate it. | |||
1069 | if (auto *Inst = dyn_cast<Instruction>(Val)) { | |||
1070 | if (S.contains(Inst)) { | |||
1071 | bool IsDead = true; | |||
1072 | ||||
1073 | // Check for "undef" loads first, then if there is a statement for | |||
1074 | // the parent of Inst and lastly if the parent of Inst has an empty | |||
1075 | // domain. In the first and last case the instruction is dead but if | |||
1076 | // there is a statement or the domain is not empty Inst is not dead. | |||
1077 | auto MemInst = MemAccInst::dyn_cast(Inst); | |||
1078 | auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; | |||
1079 | if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == | |||
1080 | SE.getPointerBase(SE.getSCEV(Address))) { | |||
1081 | } else if (S.getStmtFor(Inst)) { | |||
1082 | IsDead = false; | |||
1083 | } else { | |||
1084 | auto *Domain = S.getDomainConditions(Inst->getParent()).release(); | |||
1085 | IsDead = isl_set_is_empty(Domain); | |||
1086 | isl_set_free(Domain); | |||
1087 | } | |||
1088 | ||||
1089 | if (IsDead) { | |||
1090 | V = UndefValue::get(ParamSCEV->getType()); | |||
1091 | break; | |||
1092 | } | |||
1093 | } | |||
1094 | } | |||
1095 | ||||
1096 | if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { | |||
1097 | // Check if this invariant access class is empty, hence if we never | |||
1098 | // actually added a loads instruction to it. In that case it has no | |||
1099 | // (meaningful) users and we should not try to code generate it. | |||
1100 | if (IAClass->InvariantAccesses.empty()) | |||
1101 | V = UndefValue::get(ParamSCEV->getType()); | |||
1102 | ||||
1103 | if (!preloadInvariantEquivClass(*IAClass)) { | |||
1104 | isl_id_free(Id); | |||
1105 | return false; | |||
1106 | } | |||
1107 | } | |||
1108 | } | |||
1109 | ||||
1110 | V = V ? V : generateSCEV(ParamSCEV); | |||
1111 | IDToValue[Id] = V; | |||
1112 | } | |||
1113 | ||||
1114 | isl_id_free(Id); | |||
1115 | return true; | |||
1116 | } | |||
1117 | ||||
1118 | bool IslNodeBuilder::materializeParameters(isl_set *Set) { | |||
1119 | for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { | |||
1120 | if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) | |||
1121 | continue; | |||
1122 | isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); | |||
1123 | if (!materializeValue(Id)) | |||
1124 | return false; | |||
1125 | } | |||
1126 | return true; | |||
1127 | } | |||
1128 | ||||
1129 | bool IslNodeBuilder::materializeParameters() { | |||
1130 | for (const SCEV *Param : S.parameters()) { | |||
1131 | isl_id *Id = S.getIdForParam(Param).release(); | |||
1132 | if (!materializeValue(Id)) | |||
1133 | return false; | |||
1134 | } | |||
1135 | return true; | |||
1136 | } | |||
1137 | ||||
1138 | /// Generate the computation of the size of the outermost dimension from the | |||
1139 | /// Fortran array descriptor (in this case, `@g_arr`). The final `%size` | |||
1140 | /// contains the size of the array. | |||
1141 | /// | |||
1142 | /// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] } | |||
1143 | /// %desc.dimensionty = type { i64, i64, i64 } | |||
1144 | /// @g_arr = global %arrty zeroinitializer, align 32 | |||
1145 | /// ... | |||
1146 | /// %0 = load i64, i64* getelementptr inbounds | |||
1147 | /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2) | |||
1148 | /// %1 = load i64, i64* getelementptr inbounds | |||
1149 | /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1) | |||
1150 | /// %2 = sub nsw i64 %0, %1 | |||
1151 | /// %size = add nsw i64 %2, 1 | |||
1152 | static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor, | |||
1153 | PollyIRBuilder &Builder, | |||
1154 | std::string ArrayName) { | |||
1155 | assert(GlobalDescriptor && "invalid global descriptor given")(static_cast<void> (0)); | |||
1156 | Type *Ty = GlobalDescriptor->getType()->getPointerElementType(); | |||
1157 | ||||
1158 | Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), | |||
1159 | Builder.getInt64(0), Builder.getInt32(2)}; | |||
1160 | Value *endPtr = Builder.CreateInBoundsGEP(Ty, GlobalDescriptor, endIdx, | |||
1161 | ArrayName + "_end_ptr"); | |||
1162 | Type *type = cast<GEPOperator>(endPtr)->getResultElementType(); | |||
1163 | assert(isa<IntegerType>(type) && "expected type of end to be integral")(static_cast<void> (0)); | |||
1164 | ||||
1165 | Value *end = Builder.CreateLoad(type, endPtr, ArrayName + "_end"); | |||
1166 | ||||
1167 | Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), | |||
1168 | Builder.getInt64(0), Builder.getInt32(1)}; | |||
1169 | Value *beginPtr = Builder.CreateInBoundsGEP(Ty, GlobalDescriptor, beginIdx, | |||
1170 | ArrayName + "_begin_ptr"); | |||
1171 | Value *begin = Builder.CreateLoad(type, beginPtr, ArrayName + "_begin"); | |||
1172 | ||||
1173 | Value *size = | |||
1174 | Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta"); | |||
1175 | ||||
1176 | size = Builder.CreateNSWAdd( | |||
1177 | end, ConstantInt::get(type, 1, /* signed = */ true), ArrayName + "_size"); | |||
1178 | ||||
1179 | return size; | |||
1180 | } | |||
1181 | ||||
1182 | bool IslNodeBuilder::materializeFortranArrayOutermostDimension() { | |||
1183 | for (ScopArrayInfo *Array : S.arrays()) { | |||
1184 | if (Array->getNumberOfDimensions() == 0) | |||
1185 | continue; | |||
1186 | ||||
1187 | Value *FAD = Array->getFortranArrayDescriptor(); | |||
1188 | if (!FAD) | |||
1189 | continue; | |||
1190 | ||||
1191 | isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release(); | |||
1192 | assert(ParametricPwAff && "parametric pw_aff corresponding "(static_cast<void> (0)) | |||
1193 | "to outermost dimension does not "(static_cast<void> (0)) | |||
1194 | "exist")(static_cast<void> (0)); | |||
1195 | ||||
1196 | isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); | |||
1197 | isl_pw_aff_free(ParametricPwAff); | |||
1198 | ||||
1199 | assert(Id && "pw_aff is not parametric")(static_cast<void> (0)); | |||
1200 | ||||
1201 | if (IDToValue.count(Id)) { | |||
1202 | isl_id_free(Id); | |||
1203 | continue; | |||
1204 | } | |||
1205 | ||||
1206 | Value *FinalValue = | |||
1207 | buildFADOutermostDimensionLoad(FAD, Builder, Array->getName()); | |||
1208 | assert(FinalValue && "unable to build Fortran array "(static_cast<void> (0)) | |||
1209 | "descriptor load of outermost dimension")(static_cast<void> (0)); | |||
1210 | IDToValue[Id] = FinalValue; | |||
1211 | isl_id_free(Id); | |||
1212 | } | |||
1213 | return true; | |||
1214 | } | |||
1215 | ||||
1216 | Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, | |||
1217 | isl_ast_build *Build, | |||
1218 | Instruction *AccInst) { | |||
1219 | isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); | |||
1220 | isl_ast_expr *Access = | |||
1221 | isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); | |||
1222 | auto *Address = isl_ast_expr_address_of(Access); | |||
1223 | auto *AddressValue = ExprBuilder.create(Address); | |||
1224 | Value *PreloadVal; | |||
1225 | ||||
1226 | // Correct the type as the SAI might have a different type than the user | |||
1227 | // expects, especially if the base pointer is a struct. | |||
1228 | Type *Ty = AccInst->getType(); | |||
1229 | ||||
1230 | auto *Ptr = AddressValue; | |||
1231 | auto Name = Ptr->getName(); | |||
1232 | auto AS = Ptr->getType()->getPointerAddressSpace(); | |||
1233 | Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast"); | |||
1234 | PreloadVal = Builder.CreateLoad(Ty, Ptr, Name + ".load"); | |||
1235 | if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) | |||
1236 | PreloadInst->setAlignment(cast<LoadInst>(AccInst)->getAlign()); | |||
1237 | ||||
1238 | // TODO: This is only a hot fix for SCoP sequences that use the same load | |||
1239 | // instruction contained and hoisted by one of the SCoPs. | |||
1240 | if (SE.isSCEVable(Ty)) | |||
1241 | SE.forgetValue(AccInst); | |||
1242 | ||||
1243 | return PreloadVal; | |||
1244 | } | |||
1245 | ||||
1246 | Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, | |||
1247 | isl_set *Domain) { | |||
1248 | isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release()); | |||
1249 | AccessRange = isl_set_gist_params(AccessRange, S.getContext().release()); | |||
1250 | ||||
1251 | if (!materializeParameters(AccessRange)) { | |||
1252 | isl_set_free(AccessRange); | |||
1253 | isl_set_free(Domain); | |||
1254 | return nullptr; | |||
1255 | } | |||
1256 | ||||
1257 | auto *Build = | |||
1258 | isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release())); | |||
1259 | isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); | |||
1260 | bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); | |||
1261 | isl_set_free(Universe); | |||
1262 | ||||
1263 | Instruction *AccInst = MA.getAccessInstruction(); | |||
1264 | Type *AccInstTy = AccInst->getType(); | |||
1265 | ||||
1266 | Value *PreloadVal = nullptr; | |||
1267 | if (AlwaysExecuted) { | |||
1268 | PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); | |||
1269 | isl_ast_build_free(Build); | |||
1270 | isl_set_free(Domain); | |||
1271 | return PreloadVal; | |||
1272 | } | |||
1273 | ||||
1274 | if (!materializeParameters(Domain)) { | |||
1275 | isl_ast_build_free(Build); | |||
1276 | isl_set_free(AccessRange); | |||
1277 | isl_set_free(Domain); | |||
1278 | return nullptr; | |||
1279 | } | |||
1280 | ||||
1281 | isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); | |||
1282 | Domain = nullptr; | |||
1283 | ||||
1284 | ExprBuilder.setTrackOverflow(true); | |||
1285 | Value *Cond = ExprBuilder.create(DomainCond); | |||
1286 | Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), | |||
1287 | "polly.preload.cond.overflown"); | |||
1288 | Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); | |||
1289 | ExprBuilder.setTrackOverflow(false); | |||
1290 | ||||
1291 | if (!Cond->getType()->isIntegerTy(1)) | |||
1292 | Cond = Builder.CreateIsNotNull(Cond); | |||
1293 | ||||
1294 | BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), | |||
1295 | &*Builder.GetInsertPoint(), &DT, &LI); | |||
1296 | CondBB->setName("polly.preload.cond"); | |||
1297 | ||||
1298 | BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); | |||
1299 | MergeBB->setName("polly.preload.merge"); | |||
1300 | ||||
1301 | Function *F = Builder.GetInsertBlock()->getParent(); | |||
1302 | LLVMContext &Context = F->getContext(); | |||
1303 | BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); | |||
1304 | ||||
1305 | DT.addNewBlock(ExecBB, CondBB); | |||
1306 | if (Loop *L = LI.getLoopFor(CondBB)) | |||
1307 | L->addBasicBlockToLoop(ExecBB, LI); | |||
1308 | ||||
1309 | auto *CondBBTerminator = CondBB->getTerminator(); | |||
1310 | Builder.SetInsertPoint(CondBBTerminator); | |||
1311 | Builder.CreateCondBr(Cond, ExecBB, MergeBB); | |||
1312 | CondBBTerminator->eraseFromParent(); | |||
1313 | ||||
1314 | Builder.SetInsertPoint(ExecBB); | |||
1315 | Builder.CreateBr(MergeBB); | |||
1316 | ||||
1317 | Builder.SetInsertPoint(ExecBB->getTerminator()); | |||
1318 | Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); | |||
1319 | Builder.SetInsertPoint(MergeBB->getTerminator()); | |||
1320 | auto *MergePHI = Builder.CreatePHI( | |||
1321 | AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); | |||
1322 | PreloadVal = MergePHI; | |||
1323 | ||||
1324 | if (!PreAccInst) { | |||
1325 | PreloadVal = nullptr; | |||
1326 | PreAccInst = UndefValue::get(AccInstTy); | |||
1327 | } | |||
1328 | ||||
1329 | MergePHI->addIncoming(PreAccInst, ExecBB); | |||
1330 | MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); | |||
1331 | ||||
1332 | isl_ast_build_free(Build); | |||
1333 | return PreloadVal; | |||
1334 | } | |||
1335 | ||||
1336 | bool IslNodeBuilder::preloadInvariantEquivClass( | |||
1337 | InvariantEquivClassTy &IAClass) { | |||
1338 | // For an equivalence class of invariant loads we pre-load the representing | |||
1339 | // element with the unified execution context. However, we have to map all | |||
1340 | // elements of the class to the one preloaded load as they are referenced | |||
1341 | // during the code generation and therefor need to be mapped. | |||
1342 | const MemoryAccessList &MAs = IAClass.InvariantAccesses; | |||
1343 | if (MAs.empty()) | |||
1344 | return true; | |||
1345 | ||||
1346 | MemoryAccess *MA = MAs.front(); | |||
1347 | assert(MA->isArrayKind() && MA->isRead())(static_cast<void> (0)); | |||
1348 | ||||
1349 | // If the access function was already mapped, the preload of this equivalence | |||
1350 | // class was triggered earlier already and doesn't need to be done again. | |||
1351 | if (ValueMap.count(MA->getAccessInstruction())) | |||
1352 | return true; | |||
1353 | ||||
1354 | // Check for recursion which can be caused by additional constraints, e.g., | |||
1355 | // non-finite loop constraints. In such a case we have to bail out and insert | |||
1356 | // a "false" runtime check that will cause the original code to be executed. | |||
1357 | auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); | |||
1358 | if (!PreloadedPtrs.insert(PtrId).second) | |||
1359 | return false; | |||
1360 | ||||
1361 | // The execution context of the IAClass. | |||
1362 | isl::set &ExecutionCtx = IAClass.ExecutionContext; | |||
1363 | ||||
1364 | // If the base pointer of this class is dependent on another one we have to | |||
1365 | // make sure it was preloaded already. | |||
1366 | auto *SAI = MA->getScopArrayInfo(); | |||
1367 | if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { | |||
1368 | if (!preloadInvariantEquivClass(*BaseIAClass)) | |||
1369 | return false; | |||
1370 | ||||
1371 | // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and | |||
1372 | // we need to refine the ExecutionCtx. | |||
1373 | isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; | |||
1374 | ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); | |||
1375 | } | |||
1376 | ||||
1377 | // If the size of a dimension is dependent on another class, make sure it is | |||
1378 | // preloaded. | |||
1379 | for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { | |||
1380 | const SCEV *Dim = SAI->getDimensionSize(i); | |||
1381 | SetVector<Value *> Values; | |||
1382 | findValues(Dim, SE, Values); | |||
1383 | for (auto *Val : Values) { | |||
1384 | if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { | |||
1385 | if (!preloadInvariantEquivClass(*BaseIAClass)) | |||
1386 | return false; | |||
1387 | ||||
1388 | // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx | |||
1389 | // and we need to refine the ExecutionCtx. | |||
1390 | isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; | |||
1391 | ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); | |||
1392 | } | |||
1393 | } | |||
1394 | } | |||
1395 | ||||
1396 | Instruction *AccInst = MA->getAccessInstruction(); | |||
1397 | Type *AccInstTy = AccInst->getType(); | |||
1398 | ||||
1399 | Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy()); | |||
1400 | if (!PreloadVal) | |||
1401 | return false; | |||
1402 | ||||
1403 | for (const MemoryAccess *MA : MAs) { | |||
1404 | Instruction *MAAccInst = MA->getAccessInstruction(); | |||
1405 | assert(PreloadVal->getType() == MAAccInst->getType())(static_cast<void> (0)); | |||
1406 | ValueMap[MAAccInst] = PreloadVal; | |||
1407 | } | |||
1408 | ||||
1409 | if (SE.isSCEVable(AccInstTy)) { | |||
1410 | isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release(); | |||
1411 | if (ParamId) | |||
1412 | IDToValue[ParamId] = PreloadVal; | |||
1413 | isl_id_free(ParamId); | |||
1414 | } | |||
1415 | ||||
1416 | BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); | |||
1417 | auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), | |||
1418 | AccInst->getName() + ".preload.s2a", | |||
1419 | &*EntryBB->getFirstInsertionPt()); | |||
1420 | Builder.CreateStore(PreloadVal, Alloca); | |||
1421 | ValueMapT PreloadedPointer; | |||
1422 | PreloadedPointer[PreloadVal] = AccInst; | |||
1423 | Annotator.addAlternativeAliasBases(PreloadedPointer); | |||
1424 | ||||
1425 | for (auto *DerivedSAI : SAI->getDerivedSAIs()) { | |||
1426 | Value *BasePtr = DerivedSAI->getBasePtr(); | |||
1427 | ||||
1428 | for (const MemoryAccess *MA : MAs) { | |||
1429 | // As the derived SAI information is quite coarse, any load from the | |||
1430 | // current SAI could be the base pointer of the derived SAI, however we | |||
1431 | // should only change the base pointer of the derived SAI if we actually | |||
1432 | // preloaded it. | |||
1433 | if (BasePtr == MA->getOriginalBaseAddr()) { | |||
1434 | assert(BasePtr->getType() == PreloadVal->getType())(static_cast<void> (0)); | |||
1435 | DerivedSAI->setBasePtr(PreloadVal); | |||
1436 | } | |||
1437 | ||||
1438 | // For scalar derived SAIs we remap the alloca used for the derived value. | |||
1439 | if (BasePtr == MA->getAccessInstruction()) | |||
1440 | ScalarMap[DerivedSAI] = Alloca; | |||
1441 | } | |||
1442 | } | |||
1443 | ||||
1444 | for (const MemoryAccess *MA : MAs) { | |||
1445 | Instruction *MAAccInst = MA->getAccessInstruction(); | |||
1446 | // Use the escape system to get the correct value to users outside the SCoP. | |||
1447 | BlockGenerator::EscapeUserVectorTy EscapeUsers; | |||
1448 | for (auto *U : MAAccInst->users()) | |||
1449 | if (Instruction *UI = dyn_cast<Instruction>(U)) | |||
1450 | if (!S.contains(UI)) | |||
1451 | EscapeUsers.push_back(UI); | |||
1452 | ||||
1453 | if (EscapeUsers.empty()) | |||
1454 | continue; | |||
1455 | ||||
1456 | EscapeMap[MA->getAccessInstruction()] = | |||
1457 | std::make_pair(Alloca, std::move(EscapeUsers)); | |||
1458 | } | |||
1459 | ||||
1460 | return true; | |||
1461 | } | |||
1462 | ||||
1463 | void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { | |||
1464 | for (auto &SAI : S.arrays()) { | |||
1465 | if (SAI->getBasePtr()) | |||
| ||||
1466 | continue; | |||
1467 | ||||
1468 | assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&(static_cast<void> (0)) | |||
1469 | "The size of the outermost dimension is used to declare newly "(static_cast<void> (0)) | |||
1470 | "created arrays that require memory allocation.")(static_cast<void> (0)); | |||
1471 | ||||
1472 | Type *NewArrayType = nullptr; | |||
1473 | ||||
1474 | // Get the size of the array = size(dim_1)*...*size(dim_n) | |||
1475 | uint64_t ArraySizeInt = 1; | |||
1476 | for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { | |||
1477 | auto *DimSize = SAI->getDimensionSize(i); | |||
1478 | unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) | |||
1479 | ->getAPInt() | |||
1480 | .getLimitedValue(); | |||
1481 | ||||
1482 | if (!NewArrayType) | |||
1483 | NewArrayType = SAI->getElementType(); | |||
1484 | ||||
1485 | NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); | |||
1486 | ArraySizeInt *= UnsignedDimSize; | |||
1487 | } | |||
1488 | ||||
1489 | if (SAI->isOnHeap()) { | |||
1490 | LLVMContext &Ctx = NewArrayType->getContext(); | |||
| ||||
1491 | ||||
1492 | // Get the IntPtrTy from the Datalayout | |||
1493 | auto IntPtrTy = DL.getIntPtrType(Ctx); | |||
1494 | ||||
1495 | // Get the size of the element type in bits | |||
1496 | unsigned Size = SAI->getElemSizeInBytes(); | |||
1497 | ||||
1498 | // Insert the malloc call at polly.start | |||
1499 | auto InstIt = std::get<0>(StartExitBlocks)->getTerminator(); | |||
1500 | auto *CreatedArray = CallInst::CreateMalloc( | |||
1501 | &*InstIt, IntPtrTy, SAI->getElementType(), | |||
1502 | ConstantInt::get(Type::getInt64Ty(Ctx), Size), | |||
1503 | ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, | |||
1504 | SAI->getName()); | |||
1505 | ||||
1506 | SAI->setBasePtr(CreatedArray); | |||
1507 | ||||
1508 | // Insert the free call at polly.exiting | |||
1509 | CallInst::CreateFree(CreatedArray, | |||
1510 | std::get<1>(StartExitBlocks)->getTerminator()); | |||
1511 | } else { | |||
1512 | auto InstIt = Builder.GetInsertBlock() | |||
1513 | ->getParent() | |||
1514 | ->getEntryBlock() | |||
1515 | .getTerminator(); | |||
1516 | ||||
1517 | auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), | |||
1518 | SAI->getName(), &*InstIt); | |||
1519 | if (PollyTargetFirstLevelCacheLineSize) | |||
1520 | CreatedArray->setAlignment(Align(PollyTargetFirstLevelCacheLineSize)); | |||
1521 | SAI->setBasePtr(CreatedArray); | |||
1522 | } | |||
1523 | } | |||
1524 | } | |||
1525 | ||||
1526 | bool IslNodeBuilder::preloadInvariantLoads() { | |||
1527 | auto &InvariantEquivClasses = S.getInvariantAccesses(); | |||
1528 | if (InvariantEquivClasses.empty()) | |||
1529 | return true; | |||
1530 | ||||
1531 | BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), | |||
1532 | &*Builder.GetInsertPoint(), &DT, &LI); | |||
1533 | PreLoadBB->setName("polly.preload.begin"); | |||
1534 | Builder.SetInsertPoint(&PreLoadBB->front()); | |||
1535 | ||||
1536 | for (auto &IAClass : InvariantEquivClasses) | |||
1537 | if (!preloadInvariantEquivClass(IAClass)) | |||
1538 | return false; | |||
1539 | ||||
1540 | return true; | |||
1541 | } | |||
1542 | ||||
1543 | void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { | |||
1544 | // Materialize values for the parameters of the SCoP. | |||
1545 | materializeParameters(); | |||
1546 | ||||
1547 | // materialize the outermost dimension parameters for a Fortran array. | |||
1548 | // NOTE: materializeParameters() does not work since it looks through | |||
1549 | // the SCEVs. We don't have a corresponding SCEV for the array size | |||
1550 | // parameter | |||
1551 | materializeFortranArrayOutermostDimension(); | |||
1552 | ||||
1553 | // Generate values for the current loop iteration for all surrounding loops. | |||
1554 | // | |||
1555 | // We may also reference loops outside of the scop which do not contain the | |||
1556 | // scop itself, but as the number of such scops may be arbitrarily large we do | |||
1557 | // not generate code for them here, but only at the point of code generation | |||
1558 | // where these values are needed. | |||
1559 | Loop *L = LI.getLoopFor(S.getEntry()); | |||
1560 | ||||
1561 | while (L != nullptr && S.contains(L)) | |||
1562 | L = L->getParentLoop(); | |||
1563 | ||||
1564 | while (L != nullptr) { | |||
1565 | materializeNonScopLoopInductionVariable(L); | |||
1566 | L = L->getParentLoop(); | |||
1567 | } | |||
1568 | ||||
1569 | isl_set_free(Context); | |||
1570 | } | |||
1571 | ||||
1572 | Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { | |||
1573 | /// We pass the insert location of our Builder, as Polly ensures during IR | |||
1574 | /// generation that there is always a valid CFG into which instructions are | |||
1575 | /// inserted. As a result, the insertpoint is known to be always followed by a | |||
1576 | /// terminator instruction. This means the insert point may be specified by a | |||
1577 | /// terminator instruction, but it can never point to an ->end() iterator | |||
1578 | /// which does not have a corresponding instruction. Hence, dereferencing | |||
1579 | /// the insertpoint to obtain an instruction is known to be save. | |||
1580 | /// | |||
1581 | /// We also do not need to update the Builder here, as new instructions are | |||
1582 | /// always inserted _before_ the given InsertLocation. As a result, the | |||
1583 | /// insert location remains valid. | |||
1584 | assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&(static_cast<void> (0)) | |||
1585 | "Insert location points after last valid instruction")(static_cast<void> (0)); | |||
1586 | Instruction *InsertLocation = &*Builder.GetInsertPoint(); | |||
1587 | return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), | |||
1588 | InsertLocation, &ValueMap, | |||
1589 | StartBlock->getSinglePredecessor()); | |||
1590 | } | |||
1591 | ||||
1592 | /// The AST expression we generate to perform the run-time check assumes | |||
1593 | /// computations on integer types of infinite size. As we only use 64-bit | |||
1594 | /// arithmetic we check for overflows, in case of which we set the result | |||
1595 | /// of this run-time check to false to be conservatively correct, | |||
1596 | Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { | |||
1597 | auto ExprBuilder = getExprBuilder(); | |||
1598 | ||||
1599 | // In case the AST expression has integers larger than 64 bit, bail out. The | |||
1600 | // resulting LLVM-IR will contain operations on types that use more than 64 | |||
1601 | // bits. These are -- in case wrapping intrinsics are used -- translated to | |||
1602 | // runtime library calls that are not available on all systems (e.g., Android) | |||
1603 | // and consequently will result in linker errors. | |||
1604 | if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) { | |||
1605 | isl_ast_expr_free(Condition); | |||
1606 | return Builder.getFalse(); | |||
1607 | } | |||
1608 | ||||
1609 | ExprBuilder.setTrackOverflow(true); | |||
1610 | Value *RTC = ExprBuilder.create(Condition); | |||
1611 | if (!RTC->getType()->isIntegerTy(1)) | |||
1612 | RTC = Builder.CreateIsNotNull(RTC); | |||
1613 | Value *OverflowHappened = | |||
1614 | Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); | |||
1615 | ||||
1616 | if (PollyGenerateRTCPrint) { | |||
1617 | auto *F = Builder.GetInsertBlock()->getParent(); | |||
1618 | RuntimeDebugBuilder::createCPUPrinter( | |||
1619 | Builder, | |||
1620 | "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + | |||
1621 | "RTC: ", | |||
1622 | RTC, " Overflow: ", OverflowHappened, | |||
1623 | "\n" | |||
1624 | " (0 failed, -1 succeeded)\n" | |||
1625 | " (if one or both are 0 falling back to original code, if both are -1 " | |||
1626 | "executing Polly code)\n"); | |||
1627 | } | |||
1628 | ||||
1629 | RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); | |||
1630 | ExprBuilder.setTrackOverflow(false); | |||
1631 | ||||
1632 | if (!isa<ConstantInt>(RTC)) | |||
1633 | VersionedScops++; | |||
1634 | ||||
1635 | return RTC; | |||
1636 | } |