File: | build/source/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp |
Warning: | line 790, column 24 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass identifies loops where we can generate the Hexagon hardware | |||
10 | // loop instruction. The hardware loop can perform loop branches with a | |||
11 | // zero-cycle overhead. | |||
12 | // | |||
13 | // The pattern that defines the induction variable can changed depending on | |||
14 | // prior optimizations. For example, the IndVarSimplify phase run by 'opt' | |||
15 | // normalizes induction variables, and the Loop Strength Reduction pass | |||
16 | // run by 'llc' may also make changes to the induction variable. | |||
17 | // The pattern detected by this phase is due to running Strength Reduction. | |||
18 | // | |||
19 | // Criteria for hardware loops: | |||
20 | // - Countable loops (w/ ind. var for a trip count) | |||
21 | // - Assumes loops are normalized by IndVarSimplify | |||
22 | // - Try inner-most loops first | |||
23 | // - No function calls in loops. | |||
24 | // | |||
25 | //===----------------------------------------------------------------------===// | |||
26 | ||||
27 | #include "HexagonInstrInfo.h" | |||
28 | #include "HexagonSubtarget.h" | |||
29 | #include "llvm/ADT/ArrayRef.h" | |||
30 | #include "llvm/ADT/STLExtras.h" | |||
31 | #include "llvm/ADT/SmallSet.h" | |||
32 | #include "llvm/ADT/SmallVector.h" | |||
33 | #include "llvm/ADT/Statistic.h" | |||
34 | #include "llvm/ADT/StringRef.h" | |||
35 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
36 | #include "llvm/CodeGen/MachineDominators.h" | |||
37 | #include "llvm/CodeGen/MachineFunction.h" | |||
38 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
39 | #include "llvm/CodeGen/MachineInstr.h" | |||
40 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
41 | #include "llvm/CodeGen/MachineLoopInfo.h" | |||
42 | #include "llvm/CodeGen/MachineOperand.h" | |||
43 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
44 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
45 | #include "llvm/IR/Constants.h" | |||
46 | #include "llvm/IR/DebugLoc.h" | |||
47 | #include "llvm/InitializePasses.h" | |||
48 | #include "llvm/Pass.h" | |||
49 | #include "llvm/Support/CommandLine.h" | |||
50 | #include "llvm/Support/Debug.h" | |||
51 | #include "llvm/Support/ErrorHandling.h" | |||
52 | #include "llvm/Support/MathExtras.h" | |||
53 | #include "llvm/Support/raw_ostream.h" | |||
54 | #include <cassert> | |||
55 | #include <cstdint> | |||
56 | #include <cstdlib> | |||
57 | #include <iterator> | |||
58 | #include <map> | |||
59 | #include <set> | |||
60 | #include <string> | |||
61 | #include <utility> | |||
62 | #include <vector> | |||
63 | ||||
64 | using namespace llvm; | |||
65 | ||||
66 | #define DEBUG_TYPE"hwloops" "hwloops" | |||
67 | ||||
68 | #ifndef NDEBUG | |||
69 | static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); | |||
70 | ||||
71 | // Option to create preheader only for a specific function. | |||
72 | static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, | |||
73 | cl::init("")); | |||
74 | #endif | |||
75 | ||||
76 | // Option to create a preheader if one doesn't exist. | |||
77 | static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", | |||
78 | cl::Hidden, cl::init(true), | |||
79 | cl::desc("Add a preheader to a hardware loop if one doesn't exist")); | |||
80 | ||||
81 | // Turn it off by default. If a preheader block is not created here, the | |||
82 | // software pipeliner may be unable to find a block suitable to serve as | |||
83 | // a preheader. In that case SWP will not run. | |||
84 | static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden, | |||
85 | cl::desc("Allow speculation of preheader " | |||
86 | "instructions")); | |||
87 | ||||
88 | STATISTIC(NumHWLoops, "Number of loops converted to hardware loops")static llvm::Statistic NumHWLoops = {"hwloops", "NumHWLoops", "Number of loops converted to hardware loops"}; | |||
89 | ||||
90 | namespace llvm { | |||
91 | ||||
92 | FunctionPass *createHexagonHardwareLoops(); | |||
93 | void initializeHexagonHardwareLoopsPass(PassRegistry&); | |||
94 | ||||
95 | } // end namespace llvm | |||
96 | ||||
97 | namespace { | |||
98 | ||||
99 | class CountValue; | |||
100 | ||||
101 | struct HexagonHardwareLoops : public MachineFunctionPass { | |||
102 | MachineLoopInfo *MLI; | |||
103 | MachineRegisterInfo *MRI; | |||
104 | MachineDominatorTree *MDT; | |||
105 | const HexagonInstrInfo *TII; | |||
106 | const HexagonRegisterInfo *TRI; | |||
107 | #ifndef NDEBUG | |||
108 | static int Counter; | |||
109 | #endif | |||
110 | ||||
111 | public: | |||
112 | static char ID; | |||
113 | ||||
114 | HexagonHardwareLoops() : MachineFunctionPass(ID) {} | |||
115 | ||||
116 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
117 | ||||
118 | StringRef getPassName() const override { return "Hexagon Hardware Loops"; } | |||
119 | ||||
120 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
121 | AU.addRequired<MachineDominatorTree>(); | |||
122 | AU.addRequired<MachineLoopInfo>(); | |||
123 | MachineFunctionPass::getAnalysisUsage(AU); | |||
124 | } | |||
125 | ||||
126 | private: | |||
127 | using LoopFeederMap = std::map<Register, MachineInstr *>; | |||
128 | ||||
129 | /// Kinds of comparisons in the compare instructions. | |||
130 | struct Comparison { | |||
131 | enum Kind { | |||
132 | EQ = 0x01, | |||
133 | NE = 0x02, | |||
134 | L = 0x04, | |||
135 | G = 0x08, | |||
136 | U = 0x40, | |||
137 | LTs = L, | |||
138 | LEs = L | EQ, | |||
139 | GTs = G, | |||
140 | GEs = G | EQ, | |||
141 | LTu = L | U, | |||
142 | LEu = L | EQ | U, | |||
143 | GTu = G | U, | |||
144 | GEu = G | EQ | U | |||
145 | }; | |||
146 | ||||
147 | static Kind getSwappedComparison(Kind Cmp) { | |||
148 | assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator")(static_cast <bool> ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator") ? void (0) : __assert_fail ("(!((Cmp & L) && (Cmp & G))) && \"Malformed comparison operator\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 148, __extension__ __PRETTY_FUNCTION__)); | |||
149 | if ((Cmp & L) || (Cmp & G)) | |||
150 | return (Kind)(Cmp ^ (L|G)); | |||
151 | return Cmp; | |||
152 | } | |||
153 | ||||
154 | static Kind getNegatedComparison(Kind Cmp) { | |||
155 | if ((Cmp & L) || (Cmp & G)) | |||
156 | return (Kind)((Cmp ^ (L | G)) ^ EQ); | |||
157 | if ((Cmp & NE) || (Cmp & EQ)) | |||
158 | return (Kind)(Cmp ^ (EQ | NE)); | |||
159 | return (Kind)0; | |||
160 | } | |||
161 | ||||
162 | static bool isSigned(Kind Cmp) { | |||
163 | return (Cmp & (L | G) && !(Cmp & U)); | |||
164 | } | |||
165 | ||||
166 | static bool isUnsigned(Kind Cmp) { | |||
167 | return (Cmp & U); | |||
168 | } | |||
169 | }; | |||
170 | ||||
171 | /// Find the register that contains the loop controlling | |||
172 | /// induction variable. | |||
173 | /// If successful, it will return true and set the \p Reg, \p IVBump | |||
174 | /// and \p IVOp arguments. Otherwise it will return false. | |||
175 | /// The returned induction register is the register R that follows the | |||
176 | /// following induction pattern: | |||
177 | /// loop: | |||
178 | /// R = phi ..., [ R.next, LatchBlock ] | |||
179 | /// R.next = R + #bump | |||
180 | /// if (R.next < #N) goto loop | |||
181 | /// IVBump is the immediate value added to R, and IVOp is the instruction | |||
182 | /// "R.next = R + #bump". | |||
183 | bool findInductionRegister(MachineLoop *L, Register &Reg, | |||
184 | int64_t &IVBump, MachineInstr *&IVOp) const; | |||
185 | ||||
186 | /// Return the comparison kind for the specified opcode. | |||
187 | Comparison::Kind getComparisonKind(unsigned CondOpc, | |||
188 | MachineOperand *InitialValue, | |||
189 | const MachineOperand *Endvalue, | |||
190 | int64_t IVBump) const; | |||
191 | ||||
192 | /// Analyze the statements in a loop to determine if the loop | |||
193 | /// has a computable trip count and, if so, return a value that represents | |||
194 | /// the trip count expression. | |||
195 | CountValue *getLoopTripCount(MachineLoop *L, | |||
196 | SmallVectorImpl<MachineInstr *> &OldInsts); | |||
197 | ||||
198 | /// Return the expression that represents the number of times | |||
199 | /// a loop iterates. The function takes the operands that represent the | |||
200 | /// loop start value, loop end value, and induction value. Based upon | |||
201 | /// these operands, the function attempts to compute the trip count. | |||
202 | /// If the trip count is not directly available (as an immediate value, | |||
203 | /// or a register), the function will attempt to insert computation of it | |||
204 | /// to the loop's preheader. | |||
205 | CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, | |||
206 | const MachineOperand *End, Register IVReg, | |||
207 | int64_t IVBump, Comparison::Kind Cmp) const; | |||
208 | ||||
209 | /// Return true if the instruction is not valid within a hardware | |||
210 | /// loop. | |||
211 | bool isInvalidLoopOperation(const MachineInstr *MI, | |||
212 | bool IsInnerHWLoop) const; | |||
213 | ||||
214 | /// Return true if the loop contains an instruction that inhibits | |||
215 | /// using the hardware loop. | |||
216 | bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; | |||
217 | ||||
218 | /// Given a loop, check if we can convert it to a hardware loop. | |||
219 | /// If so, then perform the conversion and return true. | |||
220 | bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); | |||
221 | ||||
222 | /// Return true if the instruction is now dead. | |||
223 | bool isDead(const MachineInstr *MI, | |||
224 | SmallVectorImpl<MachineInstr *> &DeadPhis) const; | |||
225 | ||||
226 | /// Remove the instruction if it is now dead. | |||
227 | void removeIfDead(MachineInstr *MI); | |||
228 | ||||
229 | /// Make sure that the "bump" instruction executes before the | |||
230 | /// compare. We need that for the IV fixup, so that the compare | |||
231 | /// instruction would not use a bumped value that has not yet been | |||
232 | /// defined. If the instructions are out of order, try to reorder them. | |||
233 | bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); | |||
234 | ||||
235 | /// Return true if MO and MI pair is visited only once. If visited | |||
236 | /// more than once, this indicates there is recursion. In such a case, | |||
237 | /// return false. | |||
238 | bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, | |||
239 | const MachineOperand *MO, | |||
240 | LoopFeederMap &LoopFeederPhi) const; | |||
241 | ||||
242 | /// Return true if the Phi may generate a value that may underflow, | |||
243 | /// or may wrap. | |||
244 | bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, | |||
245 | MachineBasicBlock *MBB, MachineLoop *L, | |||
246 | LoopFeederMap &LoopFeederPhi) const; | |||
247 | ||||
248 | /// Return true if the induction variable may underflow an unsigned | |||
249 | /// value in the first iteration. | |||
250 | bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, | |||
251 | const MachineOperand *EndVal, | |||
252 | MachineBasicBlock *MBB, MachineLoop *L, | |||
253 | LoopFeederMap &LoopFeederPhi) const; | |||
254 | ||||
255 | /// Check if the given operand has a compile-time known constant | |||
256 | /// value. Return true if yes, and false otherwise. When returning true, set | |||
257 | /// Val to the corresponding constant value. | |||
258 | bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; | |||
259 | ||||
260 | /// Check if the operand has a compile-time known constant value. | |||
261 | bool isImmediate(const MachineOperand &MO) const { | |||
262 | int64_t V; | |||
263 | return checkForImmediate(MO, V); | |||
264 | } | |||
265 | ||||
266 | /// Return the immediate for the specified operand. | |||
267 | int64_t getImmediate(const MachineOperand &MO) const { | |||
268 | int64_t V; | |||
269 | if (!checkForImmediate(MO, V)) | |||
270 | llvm_unreachable("Invalid operand")::llvm::llvm_unreachable_internal("Invalid operand", "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp" , 270); | |||
271 | return V; | |||
272 | } | |||
273 | ||||
274 | /// Reset the given machine operand to now refer to a new immediate | |||
275 | /// value. Assumes that the operand was already referencing an immediate | |||
276 | /// value, either directly, or via a register. | |||
277 | void setImmediate(MachineOperand &MO, int64_t Val); | |||
278 | ||||
279 | /// Fix the data flow of the induction variable. | |||
280 | /// The desired flow is: phi ---> bump -+-> comparison-in-latch. | |||
281 | /// | | |||
282 | /// +-> back to phi | |||
283 | /// where "bump" is the increment of the induction variable: | |||
284 | /// iv = iv + #const. | |||
285 | /// Due to some prior code transformations, the actual flow may look | |||
286 | /// like this: | |||
287 | /// phi -+-> bump ---> back to phi | |||
288 | /// | | |||
289 | /// +-> comparison-in-latch (against upper_bound-bump), | |||
290 | /// i.e. the comparison that controls the loop execution may be using | |||
291 | /// the value of the induction variable from before the increment. | |||
292 | /// | |||
293 | /// Return true if the loop's flow is the desired one (i.e. it's | |||
294 | /// either been fixed, or no fixing was necessary). | |||
295 | /// Otherwise, return false. This can happen if the induction variable | |||
296 | /// couldn't be identified, or if the value in the latch's comparison | |||
297 | /// cannot be adjusted to reflect the post-bump value. | |||
298 | bool fixupInductionVariable(MachineLoop *L); | |||
299 | ||||
300 | /// Given a loop, if it does not have a preheader, create one. | |||
301 | /// Return the block that is the preheader. | |||
302 | MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); | |||
303 | }; | |||
304 | ||||
305 | char HexagonHardwareLoops::ID = 0; | |||
306 | #ifndef NDEBUG | |||
307 | int HexagonHardwareLoops::Counter = 0; | |||
308 | #endif | |||
309 | ||||
310 | /// Abstraction for a trip count of a loop. A smaller version | |||
311 | /// of the MachineOperand class without the concerns of changing the | |||
312 | /// operand representation. | |||
313 | class CountValue { | |||
314 | public: | |||
315 | enum CountValueType { | |||
316 | CV_Register, | |||
317 | CV_Immediate | |||
318 | }; | |||
319 | ||||
320 | private: | |||
321 | CountValueType Kind; | |||
322 | union Values { | |||
323 | Values() : R{Register(), 0} {} | |||
324 | Values(const Values&) = default; | |||
325 | struct { | |||
326 | Register Reg; | |||
327 | unsigned Sub; | |||
328 | } R; | |||
329 | unsigned ImmVal; | |||
330 | } Contents; | |||
331 | ||||
332 | public: | |||
333 | explicit CountValue(CountValueType t, Register v, unsigned u = 0) { | |||
334 | Kind = t; | |||
335 | if (Kind == CV_Register) { | |||
336 | Contents.R.Reg = v; | |||
337 | Contents.R.Sub = u; | |||
338 | } else { | |||
339 | Contents.ImmVal = v; | |||
340 | } | |||
341 | } | |||
342 | ||||
343 | bool isReg() const { return Kind == CV_Register; } | |||
344 | bool isImm() const { return Kind == CV_Immediate; } | |||
345 | ||||
346 | Register getReg() const { | |||
347 | assert(isReg() && "Wrong CountValue accessor")(static_cast <bool> (isReg() && "Wrong CountValue accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong CountValue accessor\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 347, __extension__ __PRETTY_FUNCTION__)); | |||
348 | return Contents.R.Reg; | |||
349 | } | |||
350 | ||||
351 | unsigned getSubReg() const { | |||
352 | assert(isReg() && "Wrong CountValue accessor")(static_cast <bool> (isReg() && "Wrong CountValue accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong CountValue accessor\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 352, __extension__ __PRETTY_FUNCTION__)); | |||
353 | return Contents.R.Sub; | |||
354 | } | |||
355 | ||||
356 | unsigned getImm() const { | |||
357 | assert(isImm() && "Wrong CountValue accessor")(static_cast <bool> (isImm() && "Wrong CountValue accessor" ) ? void (0) : __assert_fail ("isImm() && \"Wrong CountValue accessor\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 357, __extension__ __PRETTY_FUNCTION__)); | |||
358 | return Contents.ImmVal; | |||
359 | } | |||
360 | ||||
361 | void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { | |||
362 | if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); } | |||
363 | if (isImm()) { OS << Contents.ImmVal; } | |||
364 | } | |||
365 | }; | |||
366 | ||||
367 | } // end anonymous namespace | |||
368 | ||||
369 | INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",static void *initializeHexagonHardwareLoopsPassOnce(PassRegistry &Registry) { | |||
370 | "Hexagon Hardware Loops", false, false)static void *initializeHexagonHardwareLoopsPassOnce(PassRegistry &Registry) { | |||
371 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)initializeMachineDominatorTreePass(Registry); | |||
372 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry); | |||
373 | INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",PassInfo *PI = new PassInfo( "Hexagon Hardware Loops", "hwloops" , &HexagonHardwareLoops::ID, PassInfo::NormalCtor_t(callDefaultCtor <HexagonHardwareLoops>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeHexagonHardwareLoopsPassFlag ; void llvm::initializeHexagonHardwareLoopsPass(PassRegistry & Registry) { llvm::call_once(InitializeHexagonHardwareLoopsPassFlag , initializeHexagonHardwareLoopsPassOnce, std::ref(Registry)) ; } | |||
374 | "Hexagon Hardware Loops", false, false)PassInfo *PI = new PassInfo( "Hexagon Hardware Loops", "hwloops" , &HexagonHardwareLoops::ID, PassInfo::NormalCtor_t(callDefaultCtor <HexagonHardwareLoops>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeHexagonHardwareLoopsPassFlag ; void llvm::initializeHexagonHardwareLoopsPass(PassRegistry & Registry) { llvm::call_once(InitializeHexagonHardwareLoopsPassFlag , initializeHexagonHardwareLoopsPassOnce, std::ref(Registry)) ; } | |||
375 | ||||
376 | FunctionPass *llvm::createHexagonHardwareLoops() { | |||
377 | return new HexagonHardwareLoops(); | |||
378 | } | |||
379 | ||||
380 | bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { | |||
381 | LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "********* Hexagon Hardware Loops *********\n" ; } } while (false); | |||
382 | if (skipFunction(MF.getFunction())) | |||
383 | return false; | |||
384 | ||||
385 | bool Changed = false; | |||
386 | ||||
387 | MLI = &getAnalysis<MachineLoopInfo>(); | |||
388 | MRI = &MF.getRegInfo(); | |||
389 | MDT = &getAnalysis<MachineDominatorTree>(); | |||
390 | const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); | |||
391 | TII = HST.getInstrInfo(); | |||
392 | TRI = HST.getRegisterInfo(); | |||
393 | ||||
394 | for (auto &L : *MLI) | |||
395 | if (L->isOutermost()) { | |||
396 | bool L0Used = false; | |||
397 | bool L1Used = false; | |||
398 | Changed |= convertToHardwareLoop(L, L0Used, L1Used); | |||
399 | } | |||
400 | ||||
401 | return Changed; | |||
402 | } | |||
403 | ||||
404 | bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, | |||
405 | Register &Reg, | |||
406 | int64_t &IVBump, | |||
407 | MachineInstr *&IVOp | |||
408 | ) const { | |||
409 | MachineBasicBlock *Header = L->getHeader(); | |||
410 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); | |||
411 | MachineBasicBlock *Latch = L->getLoopLatch(); | |||
412 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); | |||
413 | if (!Header || !Preheader || !Latch || !ExitingBlock) | |||
414 | return false; | |||
415 | ||||
416 | // This pair represents an induction register together with an immediate | |||
417 | // value that will be added to it in each loop iteration. | |||
418 | using RegisterBump = std::pair<Register, int64_t>; | |||
419 | ||||
420 | // Mapping: R.next -> (R, bump), where R, R.next and bump are derived | |||
421 | // from an induction operation | |||
422 | // R.next = R + bump | |||
423 | // where bump is an immediate value. | |||
424 | using InductionMap = std::map<Register, RegisterBump>; | |||
425 | ||||
426 | InductionMap IndMap; | |||
427 | ||||
428 | using instr_iterator = MachineBasicBlock::instr_iterator; | |||
429 | ||||
430 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |||
431 | I != E && I->isPHI(); ++I) { | |||
432 | MachineInstr *Phi = &*I; | |||
433 | ||||
434 | // Have a PHI instruction. Get the operand that corresponds to the | |||
435 | // latch block, and see if is a result of an addition of form "reg+imm", | |||
436 | // where the "reg" is defined by the PHI node we are looking at. | |||
437 | for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { | |||
438 | if (Phi->getOperand(i+1).getMBB() != Latch) | |||
439 | continue; | |||
440 | ||||
441 | Register PhiOpReg = Phi->getOperand(i).getReg(); | |||
442 | MachineInstr *DI = MRI->getVRegDef(PhiOpReg); | |||
443 | ||||
444 | if (DI->getDesc().isAdd()) { | |||
445 | // If the register operand to the add is the PHI we're looking at, this | |||
446 | // meets the induction pattern. | |||
447 | Register IndReg = DI->getOperand(1).getReg(); | |||
448 | MachineOperand &Opnd2 = DI->getOperand(2); | |||
449 | int64_t V; | |||
450 | if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { | |||
451 | Register UpdReg = DI->getOperand(0).getReg(); | |||
452 | IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); | |||
453 | } | |||
454 | } | |||
455 | } // for (i) | |||
456 | } // for (instr) | |||
457 | ||||
458 | SmallVector<MachineOperand,2> Cond; | |||
459 | MachineBasicBlock *TB = nullptr, *FB = nullptr; | |||
460 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); | |||
461 | if (NotAnalyzed) | |||
462 | return false; | |||
463 | ||||
464 | Register PredR; | |||
465 | unsigned PredPos, PredRegFlags; | |||
466 | if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) | |||
467 | return false; | |||
468 | ||||
469 | MachineInstr *PredI = MRI->getVRegDef(PredR); | |||
470 | if (!PredI->isCompare()) | |||
471 | return false; | |||
472 | ||||
473 | Register CmpReg1, CmpReg2; | |||
474 | int64_t CmpImm = 0, CmpMask = 0; | |||
475 | bool CmpAnalyzed = | |||
476 | TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); | |||
477 | // Fail if the compare was not analyzed, or it's not comparing a register | |||
478 | // with an immediate value. Not checking the mask here, since we handle | |||
479 | // the individual compare opcodes (including A4_cmpb*) later on. | |||
480 | if (!CmpAnalyzed) | |||
481 | return false; | |||
482 | ||||
483 | // Exactly one of the input registers to the comparison should be among | |||
484 | // the induction registers. | |||
485 | InductionMap::iterator IndMapEnd = IndMap.end(); | |||
486 | InductionMap::iterator F = IndMapEnd; | |||
487 | if (CmpReg1 != 0) { | |||
488 | InductionMap::iterator F1 = IndMap.find(CmpReg1); | |||
489 | if (F1 != IndMapEnd) | |||
490 | F = F1; | |||
491 | } | |||
492 | if (CmpReg2 != 0) { | |||
493 | InductionMap::iterator F2 = IndMap.find(CmpReg2); | |||
494 | if (F2 != IndMapEnd) { | |||
495 | if (F != IndMapEnd) | |||
496 | return false; | |||
497 | F = F2; | |||
498 | } | |||
499 | } | |||
500 | if (F == IndMapEnd) | |||
501 | return false; | |||
502 | ||||
503 | Reg = F->second.first; | |||
504 | IVBump = F->second.second; | |||
505 | IVOp = MRI->getVRegDef(F->first); | |||
506 | return true; | |||
507 | } | |||
508 | ||||
509 | // Return the comparison kind for the specified opcode. | |||
510 | HexagonHardwareLoops::Comparison::Kind | |||
511 | HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, | |||
512 | MachineOperand *InitialValue, | |||
513 | const MachineOperand *EndValue, | |||
514 | int64_t IVBump) const { | |||
515 | Comparison::Kind Cmp = (Comparison::Kind)0; | |||
516 | switch (CondOpc) { | |||
517 | case Hexagon::C2_cmpeq: | |||
518 | case Hexagon::C2_cmpeqi: | |||
519 | case Hexagon::C2_cmpeqp: | |||
520 | Cmp = Comparison::EQ; | |||
521 | break; | |||
522 | case Hexagon::C4_cmpneq: | |||
523 | case Hexagon::C4_cmpneqi: | |||
524 | Cmp = Comparison::NE; | |||
525 | break; | |||
526 | case Hexagon::C2_cmplt: | |||
527 | Cmp = Comparison::LTs; | |||
528 | break; | |||
529 | case Hexagon::C2_cmpltu: | |||
530 | Cmp = Comparison::LTu; | |||
531 | break; | |||
532 | case Hexagon::C4_cmplte: | |||
533 | case Hexagon::C4_cmpltei: | |||
534 | Cmp = Comparison::LEs; | |||
535 | break; | |||
536 | case Hexagon::C4_cmplteu: | |||
537 | case Hexagon::C4_cmplteui: | |||
538 | Cmp = Comparison::LEu; | |||
539 | break; | |||
540 | case Hexagon::C2_cmpgt: | |||
541 | case Hexagon::C2_cmpgti: | |||
542 | case Hexagon::C2_cmpgtp: | |||
543 | Cmp = Comparison::GTs; | |||
544 | break; | |||
545 | case Hexagon::C2_cmpgtu: | |||
546 | case Hexagon::C2_cmpgtui: | |||
547 | case Hexagon::C2_cmpgtup: | |||
548 | Cmp = Comparison::GTu; | |||
549 | break; | |||
550 | case Hexagon::C2_cmpgei: | |||
551 | Cmp = Comparison::GEs; | |||
552 | break; | |||
553 | case Hexagon::C2_cmpgeui: | |||
554 | Cmp = Comparison::GEs; | |||
555 | break; | |||
556 | default: | |||
557 | return (Comparison::Kind)0; | |||
558 | } | |||
559 | return Cmp; | |||
560 | } | |||
561 | ||||
562 | /// Analyze the statements in a loop to determine if the loop has | |||
563 | /// a computable trip count and, if so, return a value that represents | |||
564 | /// the trip count expression. | |||
565 | /// | |||
566 | /// This function iterates over the phi nodes in the loop to check for | |||
567 | /// induction variable patterns that are used in the calculation for | |||
568 | /// the number of time the loop is executed. | |||
569 | CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, | |||
570 | SmallVectorImpl<MachineInstr *> &OldInsts) { | |||
571 | MachineBasicBlock *TopMBB = L->getTopBlock(); | |||
572 | MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); | |||
573 | assert(PI != TopMBB->pred_end() &&(static_cast <bool> (PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!") ? void (0) : __assert_fail ("PI != TopMBB->pred_end() && \"Loop must have more than one incoming edge!\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 574, __extension__ __PRETTY_FUNCTION__)) | |||
574 | "Loop must have more than one incoming edge!")(static_cast <bool> (PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!") ? void (0) : __assert_fail ("PI != TopMBB->pred_end() && \"Loop must have more than one incoming edge!\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 574, __extension__ __PRETTY_FUNCTION__)); | |||
575 | MachineBasicBlock *Backedge = *PI++; | |||
576 | if (PI == TopMBB->pred_end()) // dead loop? | |||
577 | return nullptr; | |||
578 | MachineBasicBlock *Incoming = *PI++; | |||
579 | if (PI != TopMBB->pred_end()) // multiple backedges? | |||
580 | return nullptr; | |||
581 | ||||
582 | // Make sure there is one incoming and one backedge and determine which | |||
583 | // is which. | |||
584 | if (L->contains(Incoming)) { | |||
585 | if (L->contains(Backedge)) | |||
586 | return nullptr; | |||
587 | std::swap(Incoming, Backedge); | |||
588 | } else if (!L->contains(Backedge)) | |||
589 | return nullptr; | |||
590 | ||||
591 | // Look for the cmp instruction to determine if we can get a useful trip | |||
592 | // count. The trip count can be either a register or an immediate. The | |||
593 | // location of the value depends upon the type (reg or imm). | |||
594 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); | |||
595 | if (!ExitingBlock) | |||
596 | return nullptr; | |||
597 | ||||
598 | Register IVReg = 0; | |||
599 | int64_t IVBump = 0; | |||
600 | MachineInstr *IVOp; | |||
601 | bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); | |||
602 | if (!FoundIV) | |||
603 | return nullptr; | |||
604 | ||||
605 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); | |||
606 | ||||
607 | MachineOperand *InitialValue = nullptr; | |||
608 | MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); | |||
609 | MachineBasicBlock *Latch = L->getLoopLatch(); | |||
610 | for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { | |||
611 | MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); | |||
612 | if (MBB == Preheader) | |||
613 | InitialValue = &IV_Phi->getOperand(i); | |||
614 | else if (MBB == Latch) | |||
615 | IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. | |||
616 | } | |||
617 | if (!InitialValue) | |||
618 | return nullptr; | |||
619 | ||||
620 | SmallVector<MachineOperand,2> Cond; | |||
621 | MachineBasicBlock *TB = nullptr, *FB = nullptr; | |||
622 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); | |||
623 | if (NotAnalyzed) | |||
624 | return nullptr; | |||
625 | ||||
626 | MachineBasicBlock *Header = L->getHeader(); | |||
627 | // TB must be non-null. If FB is also non-null, one of them must be | |||
628 | // the header. Otherwise, branch to TB could be exiting the loop, and | |||
629 | // the fall through can go to the header. | |||
630 | assert (TB && "Exit block without a branch?")(static_cast <bool> (TB && "Exit block without a branch?" ) ? void (0) : __assert_fail ("TB && \"Exit block without a branch?\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 630, __extension__ __PRETTY_FUNCTION__)); | |||
631 | if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { | |||
632 | MachineBasicBlock *LTB = nullptr, *LFB = nullptr; | |||
633 | SmallVector<MachineOperand,2> LCond; | |||
634 | bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); | |||
635 | if (NotAnalyzed) | |||
636 | return nullptr; | |||
637 | if (TB == Latch) | |||
638 | TB = (LTB == Header) ? LTB : LFB; | |||
639 | else | |||
640 | FB = (LTB == Header) ? LTB: LFB; | |||
641 | } | |||
642 | assert ((!FB || TB == Header || FB == Header) && "Branches not to header?")(static_cast <bool> ((!FB || TB == Header || FB == Header ) && "Branches not to header?") ? void (0) : __assert_fail ("(!FB || TB == Header || FB == Header) && \"Branches not to header?\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 642, __extension__ __PRETTY_FUNCTION__)); | |||
643 | if (!TB || (FB && TB != Header && FB != Header)) | |||
644 | return nullptr; | |||
645 | ||||
646 | // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch | |||
647 | // to put imm(0), followed by P in the vector Cond. | |||
648 | // If TB is not the header, it means that the "not-taken" path must lead | |||
649 | // to the header. | |||
650 | bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); | |||
651 | Register PredReg; | |||
652 | unsigned PredPos, PredRegFlags; | |||
653 | if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) | |||
654 | return nullptr; | |||
655 | MachineInstr *CondI = MRI->getVRegDef(PredReg); | |||
656 | unsigned CondOpc = CondI->getOpcode(); | |||
657 | ||||
658 | Register CmpReg1, CmpReg2; | |||
659 | int64_t Mask = 0, ImmValue = 0; | |||
660 | bool AnalyzedCmp = | |||
661 | TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); | |||
662 | if (!AnalyzedCmp) | |||
663 | return nullptr; | |||
664 | ||||
665 | // The comparison operator type determines how we compute the loop | |||
666 | // trip count. | |||
667 | OldInsts.push_back(CondI); | |||
668 | OldInsts.push_back(IVOp); | |||
669 | ||||
670 | // Sadly, the following code gets information based on the position | |||
671 | // of the operands in the compare instruction. This has to be done | |||
672 | // this way, because the comparisons check for a specific relationship | |||
673 | // between the operands (e.g. is-less-than), rather than to find out | |||
674 | // what relationship the operands are in (as on PPC). | |||
675 | Comparison::Kind Cmp; | |||
676 | bool isSwapped = false; | |||
677 | const MachineOperand &Op1 = CondI->getOperand(1); | |||
678 | const MachineOperand &Op2 = CondI->getOperand(2); | |||
679 | const MachineOperand *EndValue = nullptr; | |||
680 | ||||
681 | if (Op1.isReg()) { | |||
682 | if (Op2.isImm() || Op1.getReg() == IVReg) | |||
683 | EndValue = &Op2; | |||
684 | else { | |||
685 | EndValue = &Op1; | |||
686 | isSwapped = true; | |||
687 | } | |||
688 | } | |||
689 | ||||
690 | if (!EndValue) | |||
691 | return nullptr; | |||
692 | ||||
693 | Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); | |||
694 | if (!Cmp) | |||
695 | return nullptr; | |||
696 | if (Negated) | |||
697 | Cmp = Comparison::getNegatedComparison(Cmp); | |||
698 | if (isSwapped) | |||
699 | Cmp = Comparison::getSwappedComparison(Cmp); | |||
700 | ||||
701 | if (InitialValue->isReg()) { | |||
702 | Register R = InitialValue->getReg(); | |||
703 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); | |||
704 | if (!MDT->properlyDominates(DefBB, Header)) { | |||
705 | int64_t V; | |||
706 | if (!checkForImmediate(*InitialValue, V)) | |||
707 | return nullptr; | |||
708 | } | |||
709 | OldInsts.push_back(MRI->getVRegDef(R)); | |||
710 | } | |||
711 | if (EndValue->isReg()) { | |||
712 | Register R = EndValue->getReg(); | |||
713 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); | |||
714 | if (!MDT->properlyDominates(DefBB, Header)) { | |||
715 | int64_t V; | |||
716 | if (!checkForImmediate(*EndValue, V)) | |||
717 | return nullptr; | |||
718 | } | |||
719 | OldInsts.push_back(MRI->getVRegDef(R)); | |||
720 | } | |||
721 | ||||
722 | return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); | |||
723 | } | |||
724 | ||||
725 | /// Helper function that returns the expression that represents the | |||
726 | /// number of times a loop iterates. The function takes the operands that | |||
727 | /// represent the loop start value, loop end value, and induction value. | |||
728 | /// Based upon these operands, the function attempts to compute the trip count. | |||
729 | CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, | |||
730 | const MachineOperand *Start, | |||
731 | const MachineOperand *End, | |||
732 | Register IVReg, | |||
733 | int64_t IVBump, | |||
734 | Comparison::Kind Cmp) const { | |||
735 | // Cannot handle comparison EQ, i.e. while (A == B). | |||
736 | if (Cmp == Comparison::EQ) | |||
| ||||
737 | return nullptr; | |||
738 | ||||
739 | // Check if either the start or end values are an assignment of an immediate. | |||
740 | // If so, use the immediate value rather than the register. | |||
741 | if (Start->isReg()) { | |||
742 | const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); | |||
743 | if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || | |||
744 | StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) | |||
745 | Start = &StartValInstr->getOperand(1); | |||
746 | } | |||
747 | if (End->isReg()) { | |||
748 | const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); | |||
749 | if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || | |||
750 | EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) | |||
751 | End = &EndValInstr->getOperand(1); | |||
752 | } | |||
753 | ||||
754 | if (!Start->isReg() && !Start->isImm()) | |||
755 | return nullptr; | |||
756 | if (!End->isReg() && !End->isImm()) | |||
757 | return nullptr; | |||
758 | ||||
759 | bool CmpLess = Cmp & Comparison::L; | |||
760 | bool CmpGreater = Cmp & Comparison::G; | |||
761 | bool CmpHasEqual = Cmp & Comparison::EQ; | |||
762 | ||||
763 | // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. | |||
764 | if (CmpLess && IVBump < 0) | |||
765 | // Loop going while iv is "less" with the iv value going down. Must wrap. | |||
766 | return nullptr; | |||
767 | ||||
768 | if (CmpGreater && IVBump > 0) | |||
769 | // Loop going while iv is "greater" with the iv value going up. Must wrap. | |||
770 | return nullptr; | |||
771 | ||||
772 | // Phis that may feed into the loop. | |||
773 | LoopFeederMap LoopFeederPhi; | |||
774 | ||||
775 | // Check if the initial value may be zero and can be decremented in the first | |||
776 | // iteration. If the value is zero, the endloop instruction will not decrement | |||
777 | // the loop counter, so we shouldn't generate a hardware loop in this case. | |||
778 | if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, | |||
779 | LoopFeederPhi)) | |||
780 | return nullptr; | |||
781 | ||||
782 | if (Start->isImm() && End->isImm()) { | |||
783 | // Both, start and end are immediates. | |||
784 | int64_t StartV = Start->getImm(); | |||
785 | int64_t EndV = End->getImm(); | |||
786 | int64_t Dist = EndV - StartV; | |||
787 | if (Dist == 0) | |||
788 | return nullptr; | |||
789 | ||||
790 | bool Exact = (Dist % IVBump) == 0; | |||
| ||||
791 | ||||
792 | if (Cmp == Comparison::NE) { | |||
793 | if (!Exact) | |||
794 | return nullptr; | |||
795 | if ((Dist < 0) ^ (IVBump < 0)) | |||
796 | return nullptr; | |||
797 | } | |||
798 | ||||
799 | // For comparisons that include the final value (i.e. include equality | |||
800 | // with the final value), we need to increase the distance by 1. | |||
801 | if (CmpHasEqual) | |||
802 | Dist = Dist > 0 ? Dist+1 : Dist-1; | |||
803 | ||||
804 | // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, | |||
805 | // CmpGreater should imply Dist < 0. These conditions could actually | |||
806 | // fail, for example, in unreachable code (which may still appear to be | |||
807 | // reachable in the CFG). | |||
808 | if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) | |||
809 | return nullptr; | |||
810 | ||||
811 | // "Normalized" distance, i.e. with the bump set to +-1. | |||
812 | int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump | |||
813 | : (-Dist + (-IVBump - 1)) / (-IVBump); | |||
814 | assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.")(static_cast <bool> (Dist1 > 0 && "Fishy thing. Both operands have the same sign." ) ? void (0) : __assert_fail ("Dist1 > 0 && \"Fishy thing. Both operands have the same sign.\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 814, __extension__ __PRETTY_FUNCTION__)); | |||
815 | ||||
816 | uint64_t Count = Dist1; | |||
817 | ||||
818 | if (Count > 0xFFFFFFFFULL) | |||
819 | return nullptr; | |||
820 | ||||
821 | return new CountValue(CountValue::CV_Immediate, Count); | |||
822 | } | |||
823 | ||||
824 | // A general case: Start and End are some values, but the actual | |||
825 | // iteration count may not be available. If it is not, insert | |||
826 | // a computation of it into the preheader. | |||
827 | ||||
828 | // If the induction variable bump is not a power of 2, quit. | |||
829 | // Othwerise we'd need a general integer division. | |||
830 | if (!isPowerOf2_64(std::abs(IVBump))) | |||
831 | return nullptr; | |||
832 | ||||
833 | MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); | |||
834 | assert (PH && "Should have a preheader by now")(static_cast <bool> (PH && "Should have a preheader by now" ) ? void (0) : __assert_fail ("PH && \"Should have a preheader by now\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 834, __extension__ __PRETTY_FUNCTION__)); | |||
835 | MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); | |||
836 | DebugLoc DL; | |||
837 | if (InsertPos != PH->end()) | |||
838 | DL = InsertPos->getDebugLoc(); | |||
839 | ||||
840 | // If Start is an immediate and End is a register, the trip count | |||
841 | // will be "reg - imm". Hexagon's "subtract immediate" instruction | |||
842 | // is actually "reg + -imm". | |||
843 | ||||
844 | // If the loop IV is going downwards, i.e. if the bump is negative, | |||
845 | // then the iteration count (computed as End-Start) will need to be | |||
846 | // negated. To avoid the negation, just swap Start and End. | |||
847 | if (IVBump < 0) { | |||
848 | std::swap(Start, End); | |||
849 | IVBump = -IVBump; | |||
850 | } | |||
851 | // Cmp may now have a wrong direction, e.g. LEs may now be GEs. | |||
852 | // Signedness, and "including equality" are preserved. | |||
853 | ||||
854 | bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) | |||
855 | bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) | |||
856 | ||||
857 | int64_t StartV = 0, EndV = 0; | |||
858 | if (Start->isImm()) | |||
859 | StartV = Start->getImm(); | |||
860 | if (End->isImm()) | |||
861 | EndV = End->getImm(); | |||
862 | ||||
863 | int64_t AdjV = 0; | |||
864 | // To compute the iteration count, we would need this computation: | |||
865 | // Count = (End - Start + (IVBump-1)) / IVBump | |||
866 | // or, when CmpHasEqual: | |||
867 | // Count = (End - Start + (IVBump-1)+1) / IVBump | |||
868 | // The "IVBump-1" part is the adjustment (AdjV). We can avoid | |||
869 | // generating an instruction specifically to add it if we can adjust | |||
870 | // the immediate values for Start or End. | |||
871 | ||||
872 | if (CmpHasEqual) { | |||
873 | // Need to add 1 to the total iteration count. | |||
874 | if (Start->isImm()) | |||
875 | StartV--; | |||
876 | else if (End->isImm()) | |||
877 | EndV++; | |||
878 | else | |||
879 | AdjV += 1; | |||
880 | } | |||
881 | ||||
882 | if (Cmp != Comparison::NE) { | |||
883 | if (Start->isImm()) | |||
884 | StartV -= (IVBump-1); | |||
885 | else if (End->isImm()) | |||
886 | EndV += (IVBump-1); | |||
887 | else | |||
888 | AdjV += (IVBump-1); | |||
889 | } | |||
890 | ||||
891 | Register R = 0; | |||
892 | unsigned SR = 0; | |||
893 | if (Start->isReg()) { | |||
894 | R = Start->getReg(); | |||
895 | SR = Start->getSubReg(); | |||
896 | } else { | |||
897 | R = End->getReg(); | |||
898 | SR = End->getSubReg(); | |||
899 | } | |||
900 | const TargetRegisterClass *RC = MRI->getRegClass(R); | |||
901 | // Hardware loops cannot handle 64-bit registers. If it's a double | |||
902 | // register, it has to have a subregister. | |||
903 | if (!SR && RC == &Hexagon::DoubleRegsRegClass) | |||
904 | return nullptr; | |||
905 | const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; | |||
906 | ||||
907 | // Compute DistR (register with the distance between Start and End). | |||
908 | Register DistR; | |||
909 | unsigned DistSR; | |||
910 | ||||
911 | // Avoid special case, where the start value is an imm(0). | |||
912 | if (Start->isImm() && StartV == 0) { | |||
913 | DistR = End->getReg(); | |||
914 | DistSR = End->getSubReg(); | |||
915 | } else { | |||
916 | const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : | |||
917 | (RegToImm ? TII->get(Hexagon::A2_subri) : | |||
918 | TII->get(Hexagon::A2_addi)); | |||
919 | if (RegToReg || RegToImm) { | |||
920 | Register SubR = MRI->createVirtualRegister(IntRC); | |||
921 | MachineInstrBuilder SubIB = | |||
922 | BuildMI(*PH, InsertPos, DL, SubD, SubR); | |||
923 | ||||
924 | if (RegToReg) | |||
925 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) | |||
926 | .addReg(Start->getReg(), 0, Start->getSubReg()); | |||
927 | else | |||
928 | SubIB.addImm(EndV) | |||
929 | .addReg(Start->getReg(), 0, Start->getSubReg()); | |||
930 | DistR = SubR; | |||
931 | } else { | |||
932 | // If the loop has been unrolled, we should use the original loop count | |||
933 | // instead of recalculating the value. This will avoid additional | |||
934 | // 'Add' instruction. | |||
935 | const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); | |||
936 | if (EndValInstr->getOpcode() == Hexagon::A2_addi && | |||
937 | EndValInstr->getOperand(1).getSubReg() == 0 && | |||
938 | EndValInstr->getOperand(2).getImm() == StartV) { | |||
939 | DistR = EndValInstr->getOperand(1).getReg(); | |||
940 | } else { | |||
941 | Register SubR = MRI->createVirtualRegister(IntRC); | |||
942 | MachineInstrBuilder SubIB = | |||
943 | BuildMI(*PH, InsertPos, DL, SubD, SubR); | |||
944 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) | |||
945 | .addImm(-StartV); | |||
946 | DistR = SubR; | |||
947 | } | |||
948 | } | |||
949 | DistSR = 0; | |||
950 | } | |||
951 | ||||
952 | // From DistR, compute AdjR (register with the adjusted distance). | |||
953 | Register AdjR; | |||
954 | unsigned AdjSR; | |||
955 | ||||
956 | if (AdjV == 0) { | |||
957 | AdjR = DistR; | |||
958 | AdjSR = DistSR; | |||
959 | } else { | |||
960 | // Generate CountR = ADD DistR, AdjVal | |||
961 | Register AddR = MRI->createVirtualRegister(IntRC); | |||
962 | MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); | |||
963 | BuildMI(*PH, InsertPos, DL, AddD, AddR) | |||
964 | .addReg(DistR, 0, DistSR) | |||
965 | .addImm(AdjV); | |||
966 | ||||
967 | AdjR = AddR; | |||
968 | AdjSR = 0; | |||
969 | } | |||
970 | ||||
971 | // From AdjR, compute CountR (register with the final count). | |||
972 | Register CountR; | |||
973 | unsigned CountSR; | |||
974 | ||||
975 | if (IVBump == 1) { | |||
976 | CountR = AdjR; | |||
977 | CountSR = AdjSR; | |||
978 | } else { | |||
979 | // The IV bump is a power of two. Log_2(IV bump) is the shift amount. | |||
980 | unsigned Shift = Log2_32(IVBump); | |||
981 | ||||
982 | // Generate NormR = LSR DistR, Shift. | |||
983 | Register LsrR = MRI->createVirtualRegister(IntRC); | |||
984 | const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); | |||
985 | BuildMI(*PH, InsertPos, DL, LsrD, LsrR) | |||
986 | .addReg(AdjR, 0, AdjSR) | |||
987 | .addImm(Shift); | |||
988 | ||||
989 | CountR = LsrR; | |||
990 | CountSR = 0; | |||
991 | } | |||
992 | ||||
993 | return new CountValue(CountValue::CV_Register, CountR, CountSR); | |||
994 | } | |||
995 | ||||
996 | /// Return true if the operation is invalid within hardware loop. | |||
997 | bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, | |||
998 | bool IsInnerHWLoop) const { | |||
999 | // Call is not allowed because the callee may use a hardware loop except for | |||
1000 | // the case when the call never returns. | |||
1001 | if (MI->getDesc().isCall()) | |||
1002 | return !TII->doesNotReturn(*MI); | |||
1003 | ||||
1004 | // Check if the instruction defines a hardware loop register. | |||
1005 | using namespace Hexagon; | |||
1006 | ||||
1007 | static const Register Regs01[] = { LC0, SA0, LC1, SA1 }; | |||
1008 | static const Register Regs1[] = { LC1, SA1 }; | |||
1009 | auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01, std::size(Regs01)) | |||
1010 | : ArrayRef(Regs1, std::size(Regs1)); | |||
1011 | for (Register R : CheckRegs) | |||
1012 | if (MI->modifiesRegister(R, TRI)) | |||
1013 | return true; | |||
1014 | ||||
1015 | return false; | |||
1016 | } | |||
1017 | ||||
1018 | /// Return true if the loop contains an instruction that inhibits | |||
1019 | /// the use of the hardware loop instruction. | |||
1020 | bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, | |||
1021 | bool IsInnerHWLoop) const { | |||
1022 | LLVM_DEBUG(dbgs() << "\nhw_loop head, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nhw_loop head, " << printMBBReference (**L->block_begin()); } } while (false) | |||
1023 | << printMBBReference(**L->block_begin()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nhw_loop head, " << printMBBReference (**L->block_begin()); } } while (false); | |||
1024 | for (MachineBasicBlock *MBB : L->getBlocks()) { | |||
1025 | for (const MachineInstr &MI : *MBB) { | |||
1026 | if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) { | |||
1027 | LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nCannot convert to hw_loop due to:" ; MI.dump();; } } while (false) | |||
1028 | MI.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nCannot convert to hw_loop due to:" ; MI.dump();; } } while (false); | |||
1029 | return true; | |||
1030 | } | |||
1031 | } | |||
1032 | } | |||
1033 | return false; | |||
1034 | } | |||
1035 | ||||
1036 | /// Returns true if the instruction is dead. This was essentially | |||
1037 | /// copied from DeadMachineInstructionElim::isDead, but with special cases | |||
1038 | /// for inline asm, physical registers and instructions with side effects | |||
1039 | /// removed. | |||
1040 | bool HexagonHardwareLoops::isDead(const MachineInstr *MI, | |||
1041 | SmallVectorImpl<MachineInstr *> &DeadPhis) const { | |||
1042 | // Examine each operand. | |||
1043 | for (const MachineOperand &MO : MI->operands()) { | |||
1044 | if (!MO.isReg() || !MO.isDef()) | |||
1045 | continue; | |||
1046 | ||||
1047 | Register Reg = MO.getReg(); | |||
1048 | if (MRI->use_nodbg_empty(Reg)) | |||
1049 | continue; | |||
1050 | ||||
1051 | using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator; | |||
1052 | ||||
1053 | // This instruction has users, but if the only user is the phi node for the | |||
1054 | // parent block, and the only use of that phi node is this instruction, then | |||
1055 | // this instruction is dead: both it (and the phi node) can be removed. | |||
1056 | use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); | |||
1057 | use_nodbg_iterator End = MRI->use_nodbg_end(); | |||
1058 | if (std::next(I) != End || !I->getParent()->isPHI()) | |||
1059 | return false; | |||
1060 | ||||
1061 | MachineInstr *OnePhi = I->getParent(); | |||
1062 | for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { | |||
1063 | const MachineOperand &OPO = OnePhi->getOperand(j); | |||
1064 | if (!OPO.isReg() || !OPO.isDef()) | |||
1065 | continue; | |||
1066 | ||||
1067 | Register OPReg = OPO.getReg(); | |||
1068 | use_nodbg_iterator nextJ; | |||
1069 | for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); | |||
1070 | J != End; J = nextJ) { | |||
1071 | nextJ = std::next(J); | |||
1072 | MachineOperand &Use = *J; | |||
1073 | MachineInstr *UseMI = Use.getParent(); | |||
1074 | ||||
1075 | // If the phi node has a user that is not MI, bail. | |||
1076 | if (MI != UseMI) | |||
1077 | return false; | |||
1078 | } | |||
1079 | } | |||
1080 | DeadPhis.push_back(OnePhi); | |||
1081 | } | |||
1082 | ||||
1083 | // If there are no defs with uses, the instruction is dead. | |||
1084 | return true; | |||
1085 | } | |||
1086 | ||||
1087 | void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { | |||
1088 | // This procedure was essentially copied from DeadMachineInstructionElim. | |||
1089 | ||||
1090 | SmallVector<MachineInstr*, 1> DeadPhis; | |||
1091 | if (isDead(MI, DeadPhis)) { | |||
1092 | LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "HW looping will remove: " << *MI; } } while (false); | |||
1093 | ||||
1094 | // It is possible that some DBG_VALUE instructions refer to this | |||
1095 | // instruction. Examine each def operand for such references; | |||
1096 | // if found, mark the DBG_VALUE as undef (but don't delete it). | |||
1097 | for (const MachineOperand &MO : MI->operands()) { | |||
1098 | if (!MO.isReg() || !MO.isDef()) | |||
1099 | continue; | |||
1100 | Register Reg = MO.getReg(); | |||
1101 | // We use make_early_inc_range here because setReg below invalidates the | |||
1102 | // iterator. | |||
1103 | for (MachineOperand &MO : | |||
1104 | llvm::make_early_inc_range(MRI->use_operands(Reg))) { | |||
1105 | MachineInstr *UseMI = MO.getParent(); | |||
1106 | if (UseMI == MI) | |||
1107 | continue; | |||
1108 | if (MO.isDebug()) | |||
1109 | MO.setReg(0U); | |||
1110 | } | |||
1111 | } | |||
1112 | ||||
1113 | MI->eraseFromParent(); | |||
1114 | for (unsigned i = 0; i < DeadPhis.size(); ++i) | |||
1115 | DeadPhis[i]->eraseFromParent(); | |||
1116 | } | |||
1117 | } | |||
1118 | ||||
1119 | /// Check if the loop is a candidate for converting to a hardware | |||
1120 | /// loop. If so, then perform the transformation. | |||
1121 | /// | |||
1122 | /// This function works on innermost loops first. A loop can be converted | |||
1123 | /// if it is a counting loop; either a register value or an immediate. | |||
1124 | /// | |||
1125 | /// The code makes several assumptions about the representation of the loop | |||
1126 | /// in llvm. | |||
1127 | bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, | |||
1128 | bool &RecL0used, | |||
1129 | bool &RecL1used) { | |||
1130 | // This is just to confirm basic correctness. | |||
1131 | assert(L->getHeader() && "Loop without a header?")(static_cast <bool> (L->getHeader() && "Loop without a header?" ) ? void (0) : __assert_fail ("L->getHeader() && \"Loop without a header?\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1131, __extension__ __PRETTY_FUNCTION__)); | |||
1132 | ||||
1133 | bool Changed = false; | |||
1134 | bool L0Used = false; | |||
1135 | bool L1Used = false; | |||
1136 | ||||
1137 | // Process nested loops first. | |||
1138 | for (MachineLoop *I : *L) { | |||
1139 | Changed |= convertToHardwareLoop(I, RecL0used, RecL1used); | |||
1140 | L0Used |= RecL0used; | |||
1141 | L1Used |= RecL1used; | |||
1142 | } | |||
1143 | ||||
1144 | // If a nested loop has been converted, then we can't convert this loop. | |||
1145 | if (Changed && L0Used && L1Used) | |||
1146 | return Changed; | |||
1147 | ||||
1148 | unsigned LOOP_i; | |||
1149 | unsigned LOOP_r; | |||
1150 | unsigned ENDLOOP; | |||
1151 | ||||
1152 | // Flag used to track loopN instruction: | |||
1153 | // 1 - Hardware loop is being generated for the inner most loop. | |||
1154 | // 0 - Hardware loop is being generated for the outer loop. | |||
1155 | unsigned IsInnerHWLoop = 1; | |||
1156 | ||||
1157 | if (L0Used) { | |||
1158 | LOOP_i = Hexagon::J2_loop1i; | |||
1159 | LOOP_r = Hexagon::J2_loop1r; | |||
1160 | ENDLOOP = Hexagon::ENDLOOP1; | |||
1161 | IsInnerHWLoop = 0; | |||
1162 | } else { | |||
1163 | LOOP_i = Hexagon::J2_loop0i; | |||
1164 | LOOP_r = Hexagon::J2_loop0r; | |||
1165 | ENDLOOP = Hexagon::ENDLOOP0; | |||
1166 | } | |||
1167 | ||||
1168 | #ifndef NDEBUG | |||
1169 | // Stop trying after reaching the limit (if any). | |||
1170 | int Limit = HWLoopLimit; | |||
1171 | if (Limit >= 0) { | |||
1172 | if (Counter >= HWLoopLimit) | |||
1173 | return false; | |||
1174 | Counter++; | |||
1175 | } | |||
1176 | #endif | |||
1177 | ||||
1178 | // Does the loop contain any invalid instructions? | |||
1179 | if (containsInvalidInstruction(L, IsInnerHWLoop)) | |||
1180 | return false; | |||
1181 | ||||
1182 | MachineBasicBlock *LastMBB = L->findLoopControlBlock(); | |||
1183 | // Don't generate hw loop if the loop has more than one exit. | |||
1184 | if (!LastMBB) | |||
1185 | return false; | |||
1186 | ||||
1187 | MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); | |||
1188 | if (LastI == LastMBB->end()) | |||
1189 | return false; | |||
1190 | ||||
1191 | // Is the induction variable bump feeding the latch condition? | |||
1192 | if (!fixupInductionVariable(L)) | |||
1193 | return false; | |||
1194 | ||||
1195 | // Ensure the loop has a preheader: the loop instruction will be | |||
1196 | // placed there. | |||
1197 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); | |||
1198 | if (!Preheader) { | |||
1199 | Preheader = createPreheaderForLoop(L); | |||
1200 | if (!Preheader) | |||
1201 | return false; | |||
1202 | } | |||
1203 | ||||
1204 | MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); | |||
1205 | ||||
1206 | SmallVector<MachineInstr*, 2> OldInsts; | |||
1207 | // Are we able to determine the trip count for the loop? | |||
1208 | CountValue *TripCount = getLoopTripCount(L, OldInsts); | |||
1209 | if (!TripCount) | |||
1210 | return false; | |||
1211 | ||||
1212 | // Is the trip count available in the preheader? | |||
1213 | if (TripCount->isReg()) { | |||
1214 | // There will be a use of the register inserted into the preheader, | |||
1215 | // so make sure that the register is actually defined at that point. | |||
1216 | MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); | |||
1217 | MachineBasicBlock *BBDef = TCDef->getParent(); | |||
1218 | if (!MDT->dominates(BBDef, Preheader)) | |||
1219 | return false; | |||
1220 | } | |||
1221 | ||||
1222 | // Determine the loop start. | |||
1223 | MachineBasicBlock *TopBlock = L->getTopBlock(); | |||
1224 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); | |||
1225 | MachineBasicBlock *LoopStart = nullptr; | |||
1226 | if (ExitingBlock != L->getLoopLatch()) { | |||
1227 | MachineBasicBlock *TB = nullptr, *FB = nullptr; | |||
1228 | SmallVector<MachineOperand, 2> Cond; | |||
1229 | ||||
1230 | if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) | |||
1231 | return false; | |||
1232 | ||||
1233 | if (L->contains(TB)) | |||
1234 | LoopStart = TB; | |||
1235 | else if (L->contains(FB)) | |||
1236 | LoopStart = FB; | |||
1237 | else | |||
1238 | return false; | |||
1239 | } | |||
1240 | else | |||
1241 | LoopStart = TopBlock; | |||
1242 | ||||
1243 | // Convert the loop to a hardware loop. | |||
1244 | LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "Change to hardware loop at "; L->dump(); } } while (false); | |||
1245 | DebugLoc DL; | |||
1246 | if (InsertPos != Preheader->end()) | |||
1247 | DL = InsertPos->getDebugLoc(); | |||
1248 | ||||
1249 | if (TripCount->isReg()) { | |||
1250 | // Create a copy of the loop count register. | |||
1251 | Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); | |||
1252 | BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) | |||
1253 | .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); | |||
1254 | // Add the Loop instruction to the beginning of the loop. | |||
1255 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) | |||
1256 | .addReg(CountReg); | |||
1257 | } else { | |||
1258 | assert(TripCount->isImm() && "Expecting immediate value for trip count")(static_cast <bool> (TripCount->isImm() && "Expecting immediate value for trip count" ) ? void (0) : __assert_fail ("TripCount->isImm() && \"Expecting immediate value for trip count\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1258, __extension__ __PRETTY_FUNCTION__)); | |||
1259 | // Add the Loop immediate instruction to the beginning of the loop, | |||
1260 | // if the immediate fits in the instructions. Otherwise, we need to | |||
1261 | // create a new virtual register. | |||
1262 | int64_t CountImm = TripCount->getImm(); | |||
1263 | if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) { | |||
1264 | Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); | |||
1265 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) | |||
1266 | .addImm(CountImm); | |||
1267 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) | |||
1268 | .addMBB(LoopStart).addReg(CountReg); | |||
1269 | } else | |||
1270 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) | |||
1271 | .addMBB(LoopStart).addImm(CountImm); | |||
1272 | } | |||
1273 | ||||
1274 | // Make sure the loop start always has a reference in the CFG. | |||
1275 | LoopStart->setMachineBlockAddressTaken(); | |||
1276 | ||||
1277 | // Replace the loop branch with an endloop instruction. | |||
1278 | DebugLoc LastIDL = LastI->getDebugLoc(); | |||
1279 | BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); | |||
1280 | ||||
1281 | // The loop ends with either: | |||
1282 | // - a conditional branch followed by an unconditional branch, or | |||
1283 | // - a conditional branch to the loop start. | |||
1284 | if (LastI->getOpcode() == Hexagon::J2_jumpt || | |||
1285 | LastI->getOpcode() == Hexagon::J2_jumpf) { | |||
1286 | // Delete one and change/add an uncond. branch to out of the loop. | |||
1287 | MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); | |||
1288 | LastI = LastMBB->erase(LastI); | |||
1289 | if (!L->contains(BranchTarget)) { | |||
1290 | if (LastI != LastMBB->end()) | |||
1291 | LastI = LastMBB->erase(LastI); | |||
1292 | SmallVector<MachineOperand, 0> Cond; | |||
1293 | TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); | |||
1294 | } | |||
1295 | } else { | |||
1296 | // Conditional branch to loop start; just delete it. | |||
1297 | LastMBB->erase(LastI); | |||
1298 | } | |||
1299 | delete TripCount; | |||
1300 | ||||
1301 | // The induction operation and the comparison may now be | |||
1302 | // unneeded. If these are unneeded, then remove them. | |||
1303 | for (unsigned i = 0; i < OldInsts.size(); ++i) | |||
1304 | removeIfDead(OldInsts[i]); | |||
1305 | ||||
1306 | ++NumHWLoops; | |||
1307 | ||||
1308 | // Set RecL1used and RecL0used only after hardware loop has been | |||
1309 | // successfully generated. Doing it earlier can cause wrong loop instruction | |||
1310 | // to be used. | |||
1311 | if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. | |||
1312 | RecL1used = true; | |||
1313 | else | |||
1314 | RecL0used = true; | |||
1315 | ||||
1316 | return true; | |||
1317 | } | |||
1318 | ||||
1319 | bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, | |||
1320 | MachineInstr *CmpI) { | |||
1321 | assert (BumpI != CmpI && "Bump and compare in the same instruction?")(static_cast <bool> (BumpI != CmpI && "Bump and compare in the same instruction?" ) ? void (0) : __assert_fail ("BumpI != CmpI && \"Bump and compare in the same instruction?\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1321, __extension__ __PRETTY_FUNCTION__)); | |||
1322 | ||||
1323 | MachineBasicBlock *BB = BumpI->getParent(); | |||
1324 | if (CmpI->getParent() != BB) | |||
1325 | return false; | |||
1326 | ||||
1327 | using instr_iterator = MachineBasicBlock::instr_iterator; | |||
1328 | ||||
1329 | // Check if things are in order to begin with. | |||
1330 | for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) | |||
1331 | if (&*I == CmpI) | |||
1332 | return true; | |||
1333 | ||||
1334 | // Out of order. | |||
1335 | Register PredR = CmpI->getOperand(0).getReg(); | |||
1336 | bool FoundBump = false; | |||
1337 | instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); | |||
1338 | for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { | |||
1339 | MachineInstr *In = &*I; | |||
1340 | for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { | |||
1341 | MachineOperand &MO = In->getOperand(i); | |||
1342 | if (MO.isReg() && MO.isUse()) { | |||
1343 | if (MO.getReg() == PredR) // Found an intervening use of PredR. | |||
1344 | return false; | |||
1345 | } | |||
1346 | } | |||
1347 | ||||
1348 | if (In == BumpI) { | |||
1349 | BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); | |||
1350 | FoundBump = true; | |||
1351 | break; | |||
1352 | } | |||
1353 | } | |||
1354 | assert (FoundBump && "Cannot determine instruction order")(static_cast <bool> (FoundBump && "Cannot determine instruction order" ) ? void (0) : __assert_fail ("FoundBump && \"Cannot determine instruction order\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1354, __extension__ __PRETTY_FUNCTION__)); | |||
1355 | return FoundBump; | |||
1356 | } | |||
1357 | ||||
1358 | /// This function is required to break recursion. Visiting phis in a loop may | |||
1359 | /// result in recursion during compilation. We break the recursion by making | |||
1360 | /// sure that we visit a MachineOperand and its definition in a | |||
1361 | /// MachineInstruction only once. If we attempt to visit more than once, then | |||
1362 | /// there is recursion, and will return false. | |||
1363 | bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, | |||
1364 | MachineInstr *MI, | |||
1365 | const MachineOperand *MO, | |||
1366 | LoopFeederMap &LoopFeederPhi) const { | |||
1367 | if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { | |||
1368 | LLVM_DEBUG(dbgs() << "\nhw_loop head, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nhw_loop head, " << printMBBReference (**L->block_begin()); } } while (false) | |||
1369 | << printMBBReference(**L->block_begin()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\nhw_loop head, " << printMBBReference (**L->block_begin()); } } while (false); | |||
1370 | // Ignore all BBs that form Loop. | |||
1371 | if (llvm::is_contained(L->getBlocks(), A)) | |||
1372 | return false; | |||
1373 | MachineInstr *Def = MRI->getVRegDef(MO->getReg()); | |||
1374 | LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); | |||
1375 | return true; | |||
1376 | } else | |||
1377 | // Already visited node. | |||
1378 | return false; | |||
1379 | } | |||
1380 | ||||
1381 | /// Return true if a Phi may generate a value that can underflow. | |||
1382 | /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. | |||
1383 | bool HexagonHardwareLoops::phiMayWrapOrUnderflow( | |||
1384 | MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, | |||
1385 | MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { | |||
1386 | assert(Phi->isPHI() && "Expecting a Phi.")(static_cast <bool> (Phi->isPHI() && "Expecting a Phi." ) ? void (0) : __assert_fail ("Phi->isPHI() && \"Expecting a Phi.\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1386, __extension__ __PRETTY_FUNCTION__)); | |||
1387 | // Walk through each Phi, and its used operands. Make sure that | |||
1388 | // if there is recursion in Phi, we won't generate hardware loops. | |||
1389 | for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) | |||
1390 | if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) | |||
1391 | if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, | |||
1392 | Phi->getParent(), L, LoopFeederPhi)) | |||
1393 | return true; | |||
1394 | return false; | |||
1395 | } | |||
1396 | ||||
1397 | /// Return true if the induction variable can underflow in the first iteration. | |||
1398 | /// An example, is an initial unsigned value that is 0 and is decrement in the | |||
1399 | /// first itertion of a do-while loop. In this case, we cannot generate a | |||
1400 | /// hardware loop because the endloop instruction does not decrement the loop | |||
1401 | /// counter if it is <= 1. We only need to perform this analysis if the | |||
1402 | /// initial value is a register. | |||
1403 | /// | |||
1404 | /// This function assumes the initial value may underfow unless proven | |||
1405 | /// otherwise. If the type is signed, then we don't care because signed | |||
1406 | /// underflow is undefined. We attempt to prove the initial value is not | |||
1407 | /// zero by perfoming a crude analysis of the loop counter. This function | |||
1408 | /// checks if the initial value is used in any comparison prior to the loop | |||
1409 | /// and, if so, assumes the comparison is a range check. This is inexact, | |||
1410 | /// but will catch the simple cases. | |||
1411 | bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( | |||
1412 | const MachineOperand *InitVal, const MachineOperand *EndVal, | |||
1413 | MachineBasicBlock *MBB, MachineLoop *L, | |||
1414 | LoopFeederMap &LoopFeederPhi) const { | |||
1415 | // Only check register values since they are unknown. | |||
1416 | if (!InitVal->isReg()) | |||
1417 | return false; | |||
1418 | ||||
1419 | if (!EndVal->isImm()) | |||
1420 | return false; | |||
1421 | ||||
1422 | // A register value that is assigned an immediate is a known value, and it | |||
1423 | // won't underflow in the first iteration. | |||
1424 | int64_t Imm; | |||
1425 | if (checkForImmediate(*InitVal, Imm)) | |||
1426 | return (EndVal->getImm() == Imm); | |||
1427 | ||||
1428 | Register Reg = InitVal->getReg(); | |||
1429 | ||||
1430 | // We don't know the value of a physical register. | |||
1431 | if (!Reg.isVirtual()) | |||
1432 | return true; | |||
1433 | ||||
1434 | MachineInstr *Def = MRI->getVRegDef(Reg); | |||
1435 | if (!Def) | |||
1436 | return true; | |||
1437 | ||||
1438 | // If the initial value is a Phi or copy and the operands may not underflow, | |||
1439 | // then the definition cannot be underflow either. | |||
1440 | if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), | |||
1441 | L, LoopFeederPhi)) | |||
1442 | return false; | |||
1443 | if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), | |||
1444 | EndVal, Def->getParent(), | |||
1445 | L, LoopFeederPhi)) | |||
1446 | return false; | |||
1447 | ||||
1448 | // Iterate over the uses of the initial value. If the initial value is used | |||
1449 | // in a compare, then we assume this is a range check that ensures the loop | |||
1450 | // doesn't underflow. This is not an exact test and should be improved. | |||
1451 | for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), | |||
1452 | E = MRI->use_instr_nodbg_end(); I != E; ++I) { | |||
1453 | MachineInstr *MI = &*I; | |||
1454 | Register CmpReg1, CmpReg2; | |||
1455 | int64_t CmpMask = 0, CmpValue = 0; | |||
1456 | ||||
1457 | if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) | |||
1458 | continue; | |||
1459 | ||||
1460 | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; | |||
1461 | SmallVector<MachineOperand, 2> Cond; | |||
1462 | if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) | |||
1463 | continue; | |||
1464 | ||||
1465 | Comparison::Kind Cmp = | |||
1466 | getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); | |||
1467 | if (Cmp == 0) | |||
1468 | continue; | |||
1469 | if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) | |||
1470 | Cmp = Comparison::getNegatedComparison(Cmp); | |||
1471 | if (CmpReg2 != 0 && CmpReg2 == Reg) | |||
1472 | Cmp = Comparison::getSwappedComparison(Cmp); | |||
1473 | ||||
1474 | // Signed underflow is undefined. | |||
1475 | if (Comparison::isSigned(Cmp)) | |||
1476 | return false; | |||
1477 | ||||
1478 | // Check if there is a comparison of the initial value. If the initial value | |||
1479 | // is greater than or not equal to another value, then assume this is a | |||
1480 | // range check. | |||
1481 | if ((Cmp & Comparison::G) || Cmp == Comparison::NE) | |||
1482 | return false; | |||
1483 | } | |||
1484 | ||||
1485 | // OK - this is a hack that needs to be improved. We really need to analyze | |||
1486 | // the instructions performed on the initial value. This works on the simplest | |||
1487 | // cases only. | |||
1488 | if (!Def->isCopy() && !Def->isPHI()) | |||
1489 | return false; | |||
1490 | ||||
1491 | return true; | |||
1492 | } | |||
1493 | ||||
1494 | bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, | |||
1495 | int64_t &Val) const { | |||
1496 | if (MO.isImm()) { | |||
1497 | Val = MO.getImm(); | |||
1498 | return true; | |||
1499 | } | |||
1500 | if (!MO.isReg()) | |||
1501 | return false; | |||
1502 | ||||
1503 | // MO is a register. Check whether it is defined as an immediate value, | |||
1504 | // and if so, get the value of it in TV. That value will then need to be | |||
1505 | // processed to handle potential subregisters in MO. | |||
1506 | int64_t TV; | |||
1507 | ||||
1508 | Register R = MO.getReg(); | |||
1509 | if (!R.isVirtual()) | |||
1510 | return false; | |||
1511 | MachineInstr *DI = MRI->getVRegDef(R); | |||
1512 | unsigned DOpc = DI->getOpcode(); | |||
1513 | switch (DOpc) { | |||
1514 | case TargetOpcode::COPY: | |||
1515 | case Hexagon::A2_tfrsi: | |||
1516 | case Hexagon::A2_tfrpi: | |||
1517 | case Hexagon::CONST32: | |||
1518 | case Hexagon::CONST64: | |||
1519 | // Call recursively to avoid an extra check whether operand(1) is | |||
1520 | // indeed an immediate (it could be a global address, for example), | |||
1521 | // plus we can handle COPY at the same time. | |||
1522 | if (!checkForImmediate(DI->getOperand(1), TV)) | |||
1523 | return false; | |||
1524 | break; | |||
1525 | case Hexagon::A2_combineii: | |||
1526 | case Hexagon::A4_combineir: | |||
1527 | case Hexagon::A4_combineii: | |||
1528 | case Hexagon::A4_combineri: | |||
1529 | case Hexagon::A2_combinew: { | |||
1530 | const MachineOperand &S1 = DI->getOperand(1); | |||
1531 | const MachineOperand &S2 = DI->getOperand(2); | |||
1532 | int64_t V1, V2; | |||
1533 | if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) | |||
1534 | return false; | |||
1535 | TV = V2 | (static_cast<uint64_t>(V1) << 32); | |||
1536 | break; | |||
1537 | } | |||
1538 | case TargetOpcode::REG_SEQUENCE: { | |||
1539 | const MachineOperand &S1 = DI->getOperand(1); | |||
1540 | const MachineOperand &S3 = DI->getOperand(3); | |||
1541 | int64_t V1, V3; | |||
1542 | if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) | |||
1543 | return false; | |||
1544 | unsigned Sub2 = DI->getOperand(2).getImm(); | |||
1545 | unsigned Sub4 = DI->getOperand(4).getImm(); | |||
1546 | if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi) | |||
1547 | TV = V1 | (V3 << 32); | |||
1548 | else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo) | |||
1549 | TV = V3 | (V1 << 32); | |||
1550 | else | |||
1551 | llvm_unreachable("Unexpected form of REG_SEQUENCE")::llvm::llvm_unreachable_internal("Unexpected form of REG_SEQUENCE" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1551); | |||
1552 | break; | |||
1553 | } | |||
1554 | ||||
1555 | default: | |||
1556 | return false; | |||
1557 | } | |||
1558 | ||||
1559 | // By now, we should have successfully obtained the immediate value defining | |||
1560 | // the register referenced in MO. Handle a potential use of a subregister. | |||
1561 | switch (MO.getSubReg()) { | |||
1562 | case Hexagon::isub_lo: | |||
1563 | Val = TV & 0xFFFFFFFFULL; | |||
1564 | break; | |||
1565 | case Hexagon::isub_hi: | |||
1566 | Val = (TV >> 32) & 0xFFFFFFFFULL; | |||
1567 | break; | |||
1568 | default: | |||
1569 | Val = TV; | |||
1570 | break; | |||
1571 | } | |||
1572 | return true; | |||
1573 | } | |||
1574 | ||||
1575 | void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { | |||
1576 | if (MO.isImm()) { | |||
1577 | MO.setImm(Val); | |||
1578 | return; | |||
1579 | } | |||
1580 | ||||
1581 | assert(MO.isReg())(static_cast <bool> (MO.isReg()) ? void (0) : __assert_fail ("MO.isReg()", "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp" , 1581, __extension__ __PRETTY_FUNCTION__)); | |||
1582 | Register R = MO.getReg(); | |||
1583 | MachineInstr *DI = MRI->getVRegDef(R); | |||
1584 | ||||
1585 | const TargetRegisterClass *RC = MRI->getRegClass(R); | |||
1586 | Register NewR = MRI->createVirtualRegister(RC); | |||
1587 | MachineBasicBlock &B = *DI->getParent(); | |||
1588 | DebugLoc DL = DI->getDebugLoc(); | |||
1589 | BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); | |||
1590 | MO.setReg(NewR); | |||
1591 | } | |||
1592 | ||||
1593 | bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { | |||
1594 | MachineBasicBlock *Header = L->getHeader(); | |||
1595 | MachineBasicBlock *Latch = L->getLoopLatch(); | |||
1596 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); | |||
1597 | ||||
1598 | if (!(Header && Latch && ExitingBlock)) | |||
1599 | return false; | |||
1600 | ||||
1601 | // These data structures follow the same concept as the corresponding | |||
1602 | // ones in findInductionRegister (where some comments are). | |||
1603 | using RegisterBump = std::pair<Register, int64_t>; | |||
1604 | using RegisterInduction = std::pair<Register, RegisterBump>; | |||
1605 | using RegisterInductionSet = std::set<RegisterInduction>; | |||
1606 | ||||
1607 | // Register candidates for induction variables, with their associated bumps. | |||
1608 | RegisterInductionSet IndRegs; | |||
1609 | ||||
1610 | // Look for induction patterns: | |||
1611 | // %1 = PHI ..., [ latch, %2 ] | |||
1612 | // %2 = ADD %1, imm | |||
1613 | using instr_iterator = MachineBasicBlock::instr_iterator; | |||
1614 | ||||
1615 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |||
1616 | I != E && I->isPHI(); ++I) { | |||
1617 | MachineInstr *Phi = &*I; | |||
1618 | ||||
1619 | // Have a PHI instruction. | |||
1620 | for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { | |||
1621 | if (Phi->getOperand(i+1).getMBB() != Latch) | |||
1622 | continue; | |||
1623 | ||||
1624 | Register PhiReg = Phi->getOperand(i).getReg(); | |||
1625 | MachineInstr *DI = MRI->getVRegDef(PhiReg); | |||
1626 | ||||
1627 | if (DI->getDesc().isAdd()) { | |||
1628 | // If the register operand to the add/sub is the PHI we are looking | |||
1629 | // at, this meets the induction pattern. | |||
1630 | Register IndReg = DI->getOperand(1).getReg(); | |||
1631 | MachineOperand &Opnd2 = DI->getOperand(2); | |||
1632 | int64_t V; | |||
1633 | if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { | |||
1634 | Register UpdReg = DI->getOperand(0).getReg(); | |||
1635 | IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); | |||
1636 | } | |||
1637 | } | |||
1638 | } // for (i) | |||
1639 | } // for (instr) | |||
1640 | ||||
1641 | if (IndRegs.empty()) | |||
1642 | return false; | |||
1643 | ||||
1644 | MachineBasicBlock *TB = nullptr, *FB = nullptr; | |||
1645 | SmallVector<MachineOperand,2> Cond; | |||
1646 | // analyzeBranch returns true if it fails to analyze branch. | |||
1647 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); | |||
1648 | if (NotAnalyzed || Cond.empty()) | |||
1649 | return false; | |||
1650 | ||||
1651 | if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { | |||
1652 | MachineBasicBlock *LTB = nullptr, *LFB = nullptr; | |||
1653 | SmallVector<MachineOperand,2> LCond; | |||
1654 | bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); | |||
1655 | if (NotAnalyzed) | |||
1656 | return false; | |||
1657 | ||||
1658 | // Since latch is not the exiting block, the latch branch should be an | |||
1659 | // unconditional branch to the loop header. | |||
1660 | if (TB == Latch) | |||
1661 | TB = (LTB == Header) ? LTB : LFB; | |||
1662 | else | |||
1663 | FB = (LTB == Header) ? LTB : LFB; | |||
1664 | } | |||
1665 | if (TB != Header) { | |||
1666 | if (FB != Header) { | |||
1667 | // The latch/exit block does not go back to the header. | |||
1668 | return false; | |||
1669 | } | |||
1670 | // FB is the header (i.e., uncond. jump to branch header) | |||
1671 | // In this case, the LoopBody -> TB should not be a back edge otherwise | |||
1672 | // it could result in an infinite loop after conversion to hw_loop. | |||
1673 | // This case can happen when the Latch has two jumps like this: | |||
1674 | // Jmp_c OuterLoopHeader <-- TB | |||
1675 | // Jmp InnerLoopHeader <-- FB | |||
1676 | if (MDT->dominates(TB, FB)) | |||
1677 | return false; | |||
1678 | } | |||
1679 | ||||
1680 | // Expecting a predicate register as a condition. It won't be a hardware | |||
1681 | // predicate register at this point yet, just a vreg. | |||
1682 | // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0) | |||
1683 | // into Cond, followed by the predicate register. For non-negated branches | |||
1684 | // it's just the register. | |||
1685 | unsigned CSz = Cond.size(); | |||
1686 | if (CSz != 1 && CSz != 2) | |||
1687 | return false; | |||
1688 | ||||
1689 | if (!Cond[CSz-1].isReg()) | |||
1690 | return false; | |||
1691 | ||||
1692 | Register P = Cond[CSz - 1].getReg(); | |||
1693 | MachineInstr *PredDef = MRI->getVRegDef(P); | |||
1694 | ||||
1695 | if (!PredDef->isCompare()) | |||
1696 | return false; | |||
1697 | ||||
1698 | SmallSet<Register,2> CmpRegs; | |||
1699 | MachineOperand *CmpImmOp = nullptr; | |||
1700 | ||||
1701 | // Go over all operands to the compare and look for immediate and register | |||
1702 | // operands. Assume that if the compare has a single register use and a | |||
1703 | // single immediate operand, then the register is being compared with the | |||
1704 | // immediate value. | |||
1705 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { | |||
1706 | MachineOperand &MO = PredDef->getOperand(i); | |||
1707 | if (MO.isReg()) { | |||
1708 | // Skip all implicit references. In one case there was: | |||
1709 | // %140 = FCMPUGT32_rr %138, %139, implicit %usr | |||
1710 | if (MO.isImplicit()) | |||
1711 | continue; | |||
1712 | if (MO.isUse()) { | |||
1713 | if (!isImmediate(MO)) { | |||
1714 | CmpRegs.insert(MO.getReg()); | |||
1715 | continue; | |||
1716 | } | |||
1717 | // Consider the register to be the "immediate" operand. | |||
1718 | if (CmpImmOp) | |||
1719 | return false; | |||
1720 | CmpImmOp = &MO; | |||
1721 | } | |||
1722 | } else if (MO.isImm()) { | |||
1723 | if (CmpImmOp) // A second immediate argument? Confusing. Bail out. | |||
1724 | return false; | |||
1725 | CmpImmOp = &MO; | |||
1726 | } | |||
1727 | } | |||
1728 | ||||
1729 | if (CmpRegs.empty()) | |||
1730 | return false; | |||
1731 | ||||
1732 | // Check if the compared register follows the order we want. Fix if needed. | |||
1733 | for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); | |||
1734 | I != E; ++I) { | |||
1735 | // This is a success. If the register used in the comparison is one that | |||
1736 | // we have identified as a bumped (updated) induction register, there is | |||
1737 | // nothing to do. | |||
1738 | if (CmpRegs.count(I->first)) | |||
1739 | return true; | |||
1740 | ||||
1741 | // Otherwise, if the register being compared comes out of a PHI node, | |||
1742 | // and has been recognized as following the induction pattern, and is | |||
1743 | // compared against an immediate, we can fix it. | |||
1744 | const RegisterBump &RB = I->second; | |||
1745 | if (CmpRegs.count(RB.first)) { | |||
1746 | if (!CmpImmOp) { | |||
1747 | // If both operands to the compare instruction are registers, see if | |||
1748 | // it can be changed to use induction register as one of the operands. | |||
1749 | MachineInstr *IndI = nullptr; | |||
1750 | MachineInstr *nonIndI = nullptr; | |||
1751 | MachineOperand *IndMO = nullptr; | |||
1752 | MachineOperand *nonIndMO = nullptr; | |||
1753 | ||||
1754 | for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { | |||
1755 | MachineOperand &MO = PredDef->getOperand(i); | |||
1756 | if (MO.isReg() && MO.getReg() == RB.first) { | |||
1757 | LLVM_DEBUG(dbgs() << "\n DefMI(" << ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\n DefMI(" << i << ") = " << *(MRI->getVRegDef(I->first)); } } while (false) | |||
1758 | << ") = " << *(MRI->getVRegDef(I->first)))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\n DefMI(" << i << ") = " << *(MRI->getVRegDef(I->first)); } } while (false); | |||
1759 | if (IndI) | |||
1760 | return false; | |||
1761 | ||||
1762 | IndI = MRI->getVRegDef(I->first); | |||
1763 | IndMO = &MO; | |||
1764 | } else if (MO.isReg()) { | |||
1765 | LLVM_DEBUG(dbgs() << "\n DefMI(" << ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\n DefMI(" << i << ") = " << *(MRI->getVRegDef(MO.getReg())); } } while (false) | |||
1766 | << ") = " << *(MRI->getVRegDef(MO.getReg())))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hwloops")) { dbgs() << "\n DefMI(" << i << ") = " << *(MRI->getVRegDef(MO.getReg())); } } while (false); | |||
1767 | if (nonIndI) | |||
1768 | return false; | |||
1769 | ||||
1770 | nonIndI = MRI->getVRegDef(MO.getReg()); | |||
1771 | nonIndMO = &MO; | |||
1772 | } | |||
1773 | } | |||
1774 | if (IndI && nonIndI && | |||
1775 | nonIndI->getOpcode() == Hexagon::A2_addi && | |||
1776 | nonIndI->getOperand(2).isImm() && | |||
1777 | nonIndI->getOperand(2).getImm() == - RB.second) { | |||
1778 | bool Order = orderBumpCompare(IndI, PredDef); | |||
1779 | if (Order) { | |||
1780 | IndMO->setReg(I->first); | |||
1781 | nonIndMO->setReg(nonIndI->getOperand(1).getReg()); | |||
1782 | return true; | |||
1783 | } | |||
1784 | } | |||
1785 | return false; | |||
1786 | } | |||
1787 | ||||
1788 | // It is not valid to do this transformation on an unsigned comparison | |||
1789 | // because it may underflow. | |||
1790 | Comparison::Kind Cmp = | |||
1791 | getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); | |||
1792 | if (!Cmp || Comparison::isUnsigned(Cmp)) | |||
1793 | return false; | |||
1794 | ||||
1795 | // If the register is being compared against an immediate, try changing | |||
1796 | // the compare instruction to use induction register and adjust the | |||
1797 | // immediate operand. | |||
1798 | int64_t CmpImm = getImmediate(*CmpImmOp); | |||
1799 | int64_t V = RB.second; | |||
1800 | // Handle Overflow (64-bit). | |||
1801 | if (((V > 0) && (CmpImm > INT64_MAX(9223372036854775807L) - V)) || | |||
1802 | ((V < 0) && (CmpImm < INT64_MIN(-9223372036854775807L -1) - V))) | |||
1803 | return false; | |||
1804 | CmpImm += V; | |||
1805 | // Most comparisons of register against an immediate value allow | |||
1806 | // the immediate to be constant-extended. There are some exceptions | |||
1807 | // though. Make sure the new combination will work. | |||
1808 | if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) && | |||
1809 | !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false)) | |||
1810 | return false; | |||
1811 | ||||
1812 | // Make sure that the compare happens after the bump. Otherwise, | |||
1813 | // after the fixup, the compare would use a yet-undefined register. | |||
1814 | MachineInstr *BumpI = MRI->getVRegDef(I->first); | |||
1815 | bool Order = orderBumpCompare(BumpI, PredDef); | |||
1816 | if (!Order) | |||
1817 | return false; | |||
1818 | ||||
1819 | // Finally, fix the compare instruction. | |||
1820 | setImmediate(*CmpImmOp, CmpImm); | |||
1821 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { | |||
1822 | MachineOperand &MO = PredDef->getOperand(i); | |||
1823 | if (MO.isReg() && MO.getReg() == RB.first) { | |||
1824 | MO.setReg(I->first); | |||
1825 | return true; | |||
1826 | } | |||
1827 | } | |||
1828 | } | |||
1829 | } | |||
1830 | ||||
1831 | return false; | |||
1832 | } | |||
1833 | ||||
1834 | /// createPreheaderForLoop - Create a preheader for a given loop. | |||
1835 | MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( | |||
1836 | MachineLoop *L) { | |||
1837 | if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) | |||
1838 | return TmpPH; | |||
1839 | if (!HWCreatePreheader) | |||
1840 | return nullptr; | |||
1841 | ||||
1842 | MachineBasicBlock *Header = L->getHeader(); | |||
1843 | MachineBasicBlock *Latch = L->getLoopLatch(); | |||
1844 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); | |||
1845 | MachineFunction *MF = Header->getParent(); | |||
1846 | DebugLoc DL; | |||
1847 | ||||
1848 | #ifndef NDEBUG | |||
1849 | if ((!PHFn.empty()) && (PHFn != MF->getName())) | |||
1850 | return nullptr; | |||
1851 | #endif | |||
1852 | ||||
1853 | if (!Latch || !ExitingBlock || Header->hasAddressTaken()) | |||
1854 | return nullptr; | |||
1855 | ||||
1856 | using instr_iterator = MachineBasicBlock::instr_iterator; | |||
1857 | ||||
1858 | // Verify that all existing predecessors have analyzable branches | |||
1859 | // (or no branches at all). | |||
1860 | using MBBVector = std::vector<MachineBasicBlock *>; | |||
1861 | ||||
1862 | MBBVector Preds(Header->pred_begin(), Header->pred_end()); | |||
1863 | SmallVector<MachineOperand,2> Tmp1; | |||
1864 | MachineBasicBlock *TB = nullptr, *FB = nullptr; | |||
1865 | ||||
1866 | if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) | |||
1867 | return nullptr; | |||
1868 | ||||
1869 | for (MachineBasicBlock *PB : Preds) { | |||
1870 | bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); | |||
1871 | if (NotAnalyzed) | |||
1872 | return nullptr; | |||
1873 | } | |||
1874 | ||||
1875 | MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); | |||
1876 | MF->insert(Header->getIterator(), NewPH); | |||
1877 | ||||
1878 | if (Header->pred_size() > 2) { | |||
1879 | // Ensure that the header has only two predecessors: the preheader and | |||
1880 | // the loop latch. Any additional predecessors of the header should | |||
1881 | // join at the newly created preheader. Inspect all PHI nodes from the | |||
1882 | // header and create appropriate corresponding PHI nodes in the preheader. | |||
1883 | ||||
1884 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |||
1885 | I != E && I->isPHI(); ++I) { | |||
1886 | MachineInstr *PN = &*I; | |||
1887 | ||||
1888 | const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); | |||
1889 | MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); | |||
1890 | NewPH->insert(NewPH->end(), NewPN); | |||
1891 | ||||
1892 | Register PR = PN->getOperand(0).getReg(); | |||
1893 | const TargetRegisterClass *RC = MRI->getRegClass(PR); | |||
1894 | Register NewPR = MRI->createVirtualRegister(RC); | |||
1895 | NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); | |||
1896 | ||||
1897 | // Copy all non-latch operands of a header's PHI node to the newly | |||
1898 | // created PHI node in the preheader. | |||
1899 | for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { | |||
1900 | Register PredR = PN->getOperand(i).getReg(); | |||
1901 | unsigned PredRSub = PN->getOperand(i).getSubReg(); | |||
1902 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); | |||
1903 | if (PredB == Latch) | |||
1904 | continue; | |||
1905 | ||||
1906 | MachineOperand MO = MachineOperand::CreateReg(PredR, false); | |||
1907 | MO.setSubReg(PredRSub); | |||
1908 | NewPN->addOperand(MO); | |||
1909 | NewPN->addOperand(MachineOperand::CreateMBB(PredB)); | |||
1910 | } | |||
1911 | ||||
1912 | // Remove copied operands from the old PHI node and add the value | |||
1913 | // coming from the preheader's PHI. | |||
1914 | for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { | |||
1915 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); | |||
1916 | if (PredB != Latch) { | |||
1917 | PN->removeOperand(i+1); | |||
1918 | PN->removeOperand(i); | |||
1919 | } | |||
1920 | } | |||
1921 | PN->addOperand(MachineOperand::CreateReg(NewPR, false)); | |||
1922 | PN->addOperand(MachineOperand::CreateMBB(NewPH)); | |||
1923 | } | |||
1924 | } else { | |||
1925 | assert(Header->pred_size() == 2)(static_cast <bool> (Header->pred_size() == 2) ? void (0) : __assert_fail ("Header->pred_size() == 2", "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp" , 1925, __extension__ __PRETTY_FUNCTION__)); | |||
1926 | ||||
1927 | // The header has only two predecessors, but the non-latch predecessor | |||
1928 | // is not a preheader (e.g. it has other successors, etc.) | |||
1929 | // In such a case we don't need any extra PHI nodes in the new preheader, | |||
1930 | // all we need is to adjust existing PHIs in the header to now refer to | |||
1931 | // the new preheader. | |||
1932 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |||
1933 | I != E && I->isPHI(); ++I) { | |||
1934 | MachineInstr *PN = &*I; | |||
1935 | for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { | |||
1936 | MachineOperand &MO = PN->getOperand(i+1); | |||
1937 | if (MO.getMBB() != Latch) | |||
1938 | MO.setMBB(NewPH); | |||
1939 | } | |||
1940 | } | |||
1941 | } | |||
1942 | ||||
1943 | // "Reroute" the CFG edges to link in the new preheader. | |||
1944 | // If any of the predecessors falls through to the header, insert a branch | |||
1945 | // to the new preheader in that place. | |||
1946 | SmallVector<MachineOperand,1> Tmp2; | |||
1947 | SmallVector<MachineOperand,1> EmptyCond; | |||
1948 | ||||
1949 | TB = FB = nullptr; | |||
1950 | ||||
1951 | for (MachineBasicBlock *PB : Preds) { | |||
1952 | if (PB != Latch) { | |||
1953 | Tmp2.clear(); | |||
1954 | bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); | |||
1955 | (void)NotAnalyzed; // suppress compiler warning | |||
1956 | assert (!NotAnalyzed && "Should be analyzable!")(static_cast <bool> (!NotAnalyzed && "Should be analyzable!" ) ? void (0) : __assert_fail ("!NotAnalyzed && \"Should be analyzable!\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1956, __extension__ __PRETTY_FUNCTION__)); | |||
1957 | if (TB != Header && (Tmp2.empty() || FB != Header)) | |||
1958 | TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); | |||
1959 | PB->ReplaceUsesOfBlockWith(Header, NewPH); | |||
1960 | } | |||
1961 | } | |||
1962 | ||||
1963 | // It can happen that the latch block will fall through into the header. | |||
1964 | // Insert an unconditional branch to the header. | |||
1965 | TB = FB = nullptr; | |||
1966 | bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); | |||
1967 | (void)LatchNotAnalyzed; // suppress compiler warning | |||
1968 | assert (!LatchNotAnalyzed && "Should be analyzable!")(static_cast <bool> (!LatchNotAnalyzed && "Should be analyzable!" ) ? void (0) : __assert_fail ("!LatchNotAnalyzed && \"Should be analyzable!\"" , "llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp", 1968, __extension__ __PRETTY_FUNCTION__)); | |||
1969 | if (!TB && !FB) | |||
1970 | TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); | |||
1971 | ||||
1972 | // Finally, the branch from the preheader to the header. | |||
1973 | TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); | |||
1974 | NewPH->addSuccessor(Header); | |||
1975 | ||||
1976 | MachineLoop *ParentLoop = L->getParentLoop(); | |||
1977 | if (ParentLoop) | |||
1978 | ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); | |||
1979 | ||||
1980 | // Update the dominator information with the new preheader. | |||
1981 | if (MDT) { | |||
1982 | if (MachineDomTreeNode *HN = MDT->getNode(Header)) { | |||
1983 | if (MachineDomTreeNode *DHN = HN->getIDom()) { | |||
1984 | MDT->addNewBlock(NewPH, DHN->getBlock()); | |||
1985 | MDT->changeImmediateDominator(Header, NewPH); | |||
1986 | } | |||
1987 | } | |||
1988 | } | |||
1989 | ||||
1990 | return NewPH; | |||
1991 | } |