LLVM 20.0.0git
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass identifies loops where we can generate the Hexagon hardware
10// loop instruction. The hardware loop can perform loop branches with a
11// zero-cycle overhead.
12//
13// The pattern that defines the induction variable can changed depending on
14// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15// normalizes induction variables, and the Loop Strength Reduction pass
16// run by 'llc' may also make changes to the induction variable.
17// The pattern detected by this phase is due to running Strength Reduction.
18//
19// Criteria for hardware loops:
20// - Countable loops (w/ ind. var for a trip count)
21// - Assumes loops are normalized by IndVarSimplify
22// - Try inner-most loops first
23// - No function calls in loops.
24//
25//===----------------------------------------------------------------------===//
26
27#include "HexagonInstrInfo.h"
28#include "HexagonSubtarget.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
45#include "llvm/IR/DebugLoc.h"
47#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <cstdlib>
56#include <iterator>
57#include <map>
58#include <set>
59#include <string>
60#include <utility>
61#include <vector>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "hwloops"
66
67#ifndef NDEBUG
68static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
69
70// Option to create preheader only for a specific function.
71static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
72 cl::init(""));
73#endif
74
75// Option to create a preheader if one doesn't exist.
76static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
77 cl::Hidden, cl::init(true),
78 cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
79
80// Turn it off by default. If a preheader block is not created here, the
81// software pipeliner may be unable to find a block suitable to serve as
82// a preheader. In that case SWP will not run.
83static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
84 cl::desc("Allow speculation of preheader "
85 "instructions"));
86
87STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
88
89namespace llvm {
90
93
94} // end namespace llvm
95
96namespace {
97
98 class CountValue;
99
100 struct HexagonHardwareLoops : public MachineFunctionPass {
101 MachineLoopInfo *MLI;
104 const HexagonInstrInfo *TII;
106#ifndef NDEBUG
107 static int Counter;
108#endif
109
110 public:
111 static char ID;
112
113 HexagonHardwareLoops() : MachineFunctionPass(ID) {}
114
115 bool runOnMachineFunction(MachineFunction &MF) override;
116
117 StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
118
119 void getAnalysisUsage(AnalysisUsage &AU) const override {
123 }
124
125 private:
126 using LoopFeederMap = std::map<Register, MachineInstr *>;
127
128 /// Kinds of comparisons in the compare instructions.
129 struct Comparison {
130 enum Kind {
131 EQ = 0x01,
132 NE = 0x02,
133 L = 0x04,
134 G = 0x08,
135 U = 0x40,
136 LTs = L,
137 LEs = L | EQ,
138 GTs = G,
139 GEs = G | EQ,
140 LTu = L | U,
141 LEu = L | EQ | U,
142 GTu = G | U,
143 GEu = G | EQ | U
144 };
145
146 static Kind getSwappedComparison(Kind Cmp) {
147 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
148 if ((Cmp & L) || (Cmp & G))
149 return (Kind)(Cmp ^ (L|G));
150 return Cmp;
151 }
152
153 static Kind getNegatedComparison(Kind Cmp) {
154 if ((Cmp & L) || (Cmp & G))
155 return (Kind)((Cmp ^ (L | G)) ^ EQ);
156 if ((Cmp & NE) || (Cmp & EQ))
157 return (Kind)(Cmp ^ (EQ | NE));
158 return (Kind)0;
159 }
160
161 static bool isSigned(Kind Cmp) {
162 return (Cmp & (L | G) && !(Cmp & U));
163 }
164
165 static bool isUnsigned(Kind Cmp) {
166 return (Cmp & U);
167 }
168 };
169
170 /// Find the register that contains the loop controlling
171 /// induction variable.
172 /// If successful, it will return true and set the \p Reg, \p IVBump
173 /// and \p IVOp arguments. Otherwise it will return false.
174 /// The returned induction register is the register R that follows the
175 /// following induction pattern:
176 /// loop:
177 /// R = phi ..., [ R.next, LatchBlock ]
178 /// R.next = R + #bump
179 /// if (R.next < #N) goto loop
180 /// IVBump is the immediate value added to R, and IVOp is the instruction
181 /// "R.next = R + #bump".
182 bool findInductionRegister(MachineLoop *L, Register &Reg,
183 int64_t &IVBump, MachineInstr *&IVOp) const;
184
185 /// Return the comparison kind for the specified opcode.
186 Comparison::Kind getComparisonKind(unsigned CondOpc,
187 MachineOperand *InitialValue,
188 const MachineOperand *Endvalue,
189 int64_t IVBump) const;
190
191 /// Analyze the statements in a loop to determine if the loop
192 /// has a computable trip count and, if so, return a value that represents
193 /// the trip count expression.
194 CountValue *getLoopTripCount(MachineLoop *L,
196
197 /// Return the expression that represents the number of times
198 /// a loop iterates. The function takes the operands that represent the
199 /// loop start value, loop end value, and induction value. Based upon
200 /// these operands, the function attempts to compute the trip count.
201 /// If the trip count is not directly available (as an immediate value,
202 /// or a register), the function will attempt to insert computation of it
203 /// to the loop's preheader.
204 CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
205 const MachineOperand *End, Register IVReg,
206 int64_t IVBump, Comparison::Kind Cmp) const;
207
208 /// Return true if the instruction is not valid within a hardware
209 /// loop.
210 bool isInvalidLoopOperation(const MachineInstr *MI,
211 bool IsInnerHWLoop) const;
212
213 /// Return true if the loop contains an instruction that inhibits
214 /// using the hardware loop.
215 bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
216
217 /// Given a loop, check if we can convert it to a hardware loop.
218 /// If so, then perform the conversion and return true.
219 bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
220
221 /// Return true if the instruction is now dead.
222 bool isDead(const MachineInstr *MI,
223 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
224
225 /// Remove the instruction if it is now dead.
226 void removeIfDead(MachineInstr *MI);
227
228 /// Make sure that the "bump" instruction executes before the
229 /// compare. We need that for the IV fixup, so that the compare
230 /// instruction would not use a bumped value that has not yet been
231 /// defined. If the instructions are out of order, try to reorder them.
232 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
233
234 /// Return true if MO and MI pair is visited only once. If visited
235 /// more than once, this indicates there is recursion. In such a case,
236 /// return false.
237 bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
238 const MachineOperand *MO,
239 LoopFeederMap &LoopFeederPhi) const;
240
241 /// Return true if the Phi may generate a value that may underflow,
242 /// or may wrap.
243 bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
245 LoopFeederMap &LoopFeederPhi) const;
246
247 /// Return true if the induction variable may underflow an unsigned
248 /// value in the first iteration.
249 bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
250 const MachineOperand *EndVal,
252 LoopFeederMap &LoopFeederPhi) const;
253
254 /// Check if the given operand has a compile-time known constant
255 /// value. Return true if yes, and false otherwise. When returning true, set
256 /// Val to the corresponding constant value.
257 bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
258
259 /// Check if the operand has a compile-time known constant value.
260 bool isImmediate(const MachineOperand &MO) const {
261 int64_t V;
262 return checkForImmediate(MO, V);
263 }
264
265 /// Return the immediate for the specified operand.
266 int64_t getImmediate(const MachineOperand &MO) const {
267 int64_t V;
268 if (!checkForImmediate(MO, V))
269 llvm_unreachable("Invalid operand");
270 return V;
271 }
272
273 /// Reset the given machine operand to now refer to a new immediate
274 /// value. Assumes that the operand was already referencing an immediate
275 /// value, either directly, or via a register.
276 void setImmediate(MachineOperand &MO, int64_t Val);
277
278 /// Fix the data flow of the induction variable.
279 /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
280 /// |
281 /// +-> back to phi
282 /// where "bump" is the increment of the induction variable:
283 /// iv = iv + #const.
284 /// Due to some prior code transformations, the actual flow may look
285 /// like this:
286 /// phi -+-> bump ---> back to phi
287 /// |
288 /// +-> comparison-in-latch (against upper_bound-bump),
289 /// i.e. the comparison that controls the loop execution may be using
290 /// the value of the induction variable from before the increment.
291 ///
292 /// Return true if the loop's flow is the desired one (i.e. it's
293 /// either been fixed, or no fixing was necessary).
294 /// Otherwise, return false. This can happen if the induction variable
295 /// couldn't be identified, or if the value in the latch's comparison
296 /// cannot be adjusted to reflect the post-bump value.
297 bool fixupInductionVariable(MachineLoop *L);
298
299 /// Given a loop, if it does not have a preheader, create one.
300 /// Return the block that is the preheader.
301 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
302 };
303
304 char HexagonHardwareLoops::ID = 0;
305#ifndef NDEBUG
306 int HexagonHardwareLoops::Counter = 0;
307#endif
308
309 /// Abstraction for a trip count of a loop. A smaller version
310 /// of the MachineOperand class without the concerns of changing the
311 /// operand representation.
312 class CountValue {
313 public:
314 enum CountValueType {
315 CV_Register,
316 CV_Immediate
317 };
318
319 private:
320 CountValueType Kind;
321 union Values {
322 Values() : R{Register(), 0} {}
323 Values(const Values&) = default;
324 struct {
326 unsigned Sub;
327 } R;
328 unsigned ImmVal;
329 } Contents;
330
331 public:
332 explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
333 Kind = t;
334 if (Kind == CV_Register) {
335 Contents.R.Reg = v;
336 Contents.R.Sub = u;
337 } else {
338 Contents.ImmVal = v;
339 }
340 }
341
342 bool isReg() const { return Kind == CV_Register; }
343 bool isImm() const { return Kind == CV_Immediate; }
344
345 Register getReg() const {
346 assert(isReg() && "Wrong CountValue accessor");
347 return Contents.R.Reg;
348 }
349
350 unsigned getSubReg() const {
351 assert(isReg() && "Wrong CountValue accessor");
352 return Contents.R.Sub;
353 }
354
355 unsigned getImm() const {
356 assert(isImm() && "Wrong CountValue accessor");
357 return Contents.ImmVal;
358 }
359
360 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
361 if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
362 if (isImm()) { OS << Contents.ImmVal; }
363 }
364 };
365
366} // end anonymous namespace
367
368INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
369 "Hexagon Hardware Loops", false, false)
372INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
373 "Hexagon Hardware Loops", false, false)
374
376 return new HexagonHardwareLoops();
377}
378
379bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
380 LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
381 if (skipFunction(MF.getFunction()))
382 return false;
383
384 bool Changed = false;
385
386 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
387 MRI = &MF.getRegInfo();
388 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
390 TII = HST.getInstrInfo();
391 TRI = HST.getRegisterInfo();
392
393 for (auto &L : *MLI)
394 if (L->isOutermost()) {
395 bool L0Used = false;
396 bool L1Used = false;
397 Changed |= convertToHardwareLoop(L, L0Used, L1Used);
398 }
399
400 return Changed;
401}
402
403bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
404 Register &Reg,
405 int64_t &IVBump,
406 MachineInstr *&IVOp
407 ) const {
408 MachineBasicBlock *Header = L->getHeader();
409 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
410 MachineBasicBlock *Latch = L->getLoopLatch();
411 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
412 if (!Header || !Preheader || !Latch || !ExitingBlock)
413 return false;
414
415 // This pair represents an induction register together with an immediate
416 // value that will be added to it in each loop iteration.
417 using RegisterBump = std::pair<Register, int64_t>;
418
419 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
420 // from an induction operation
421 // R.next = R + bump
422 // where bump is an immediate value.
423 using InductionMap = std::map<Register, RegisterBump>;
424
425 InductionMap IndMap;
426
427 using instr_iterator = MachineBasicBlock::instr_iterator;
428
429 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
430 I != E && I->isPHI(); ++I) {
431 MachineInstr *Phi = &*I;
432
433 // Have a PHI instruction. Get the operand that corresponds to the
434 // latch block, and see if is a result of an addition of form "reg+imm",
435 // where the "reg" is defined by the PHI node we are looking at.
436 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
437 if (Phi->getOperand(i+1).getMBB() != Latch)
438 continue;
439
440 Register PhiOpReg = Phi->getOperand(i).getReg();
441 MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
442
443 if (DI->getDesc().isAdd()) {
444 // If the register operand to the add is the PHI we're looking at, this
445 // meets the induction pattern.
446 Register IndReg = DI->getOperand(1).getReg();
447 MachineOperand &Opnd2 = DI->getOperand(2);
448 int64_t V;
449 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
450 Register UpdReg = DI->getOperand(0).getReg();
451 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
452 }
453 }
454 } // for (i)
455 } // for (instr)
456
458 MachineBasicBlock *TB = nullptr, *FB = nullptr;
459 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
460 if (NotAnalyzed)
461 return false;
462
463 Register PredR;
464 unsigned PredPos, PredRegFlags;
465 if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
466 return false;
467
468 MachineInstr *PredI = MRI->getVRegDef(PredR);
469 if (!PredI->isCompare())
470 return false;
471
472 Register CmpReg1, CmpReg2;
473 int64_t CmpImm = 0, CmpMask = 0;
474 bool CmpAnalyzed =
475 TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
476 // Fail if the compare was not analyzed, or it's not comparing a register
477 // with an immediate value. Not checking the mask here, since we handle
478 // the individual compare opcodes (including A4_cmpb*) later on.
479 if (!CmpAnalyzed)
480 return false;
481
482 // Exactly one of the input registers to the comparison should be among
483 // the induction registers.
484 InductionMap::iterator IndMapEnd = IndMap.end();
485 InductionMap::iterator F = IndMapEnd;
486 if (CmpReg1 != 0) {
487 InductionMap::iterator F1 = IndMap.find(CmpReg1);
488 if (F1 != IndMapEnd)
489 F = F1;
490 }
491 if (CmpReg2 != 0) {
492 InductionMap::iterator F2 = IndMap.find(CmpReg2);
493 if (F2 != IndMapEnd) {
494 if (F != IndMapEnd)
495 return false;
496 F = F2;
497 }
498 }
499 if (F == IndMapEnd)
500 return false;
501
502 Reg = F->second.first;
503 IVBump = F->second.second;
504 IVOp = MRI->getVRegDef(F->first);
505 return true;
506}
507
508// Return the comparison kind for the specified opcode.
509HexagonHardwareLoops::Comparison::Kind
510HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
511 MachineOperand *InitialValue,
512 const MachineOperand *EndValue,
513 int64_t IVBump) const {
514 Comparison::Kind Cmp = (Comparison::Kind)0;
515 switch (CondOpc) {
516 case Hexagon::C2_cmpeq:
517 case Hexagon::C2_cmpeqi:
518 case Hexagon::C2_cmpeqp:
519 Cmp = Comparison::EQ;
520 break;
521 case Hexagon::C4_cmpneq:
522 case Hexagon::C4_cmpneqi:
523 Cmp = Comparison::NE;
524 break;
525 case Hexagon::C2_cmplt:
526 Cmp = Comparison::LTs;
527 break;
528 case Hexagon::C2_cmpltu:
529 Cmp = Comparison::LTu;
530 break;
531 case Hexagon::C4_cmplte:
532 case Hexagon::C4_cmpltei:
533 Cmp = Comparison::LEs;
534 break;
535 case Hexagon::C4_cmplteu:
536 case Hexagon::C4_cmplteui:
537 Cmp = Comparison::LEu;
538 break;
539 case Hexagon::C2_cmpgt:
540 case Hexagon::C2_cmpgti:
541 case Hexagon::C2_cmpgtp:
542 Cmp = Comparison::GTs;
543 break;
544 case Hexagon::C2_cmpgtu:
545 case Hexagon::C2_cmpgtui:
546 case Hexagon::C2_cmpgtup:
547 Cmp = Comparison::GTu;
548 break;
549 case Hexagon::C2_cmpgei:
550 Cmp = Comparison::GEs;
551 break;
552 case Hexagon::C2_cmpgeui:
553 Cmp = Comparison::GEs;
554 break;
555 default:
556 return (Comparison::Kind)0;
557 }
558 return Cmp;
559}
560
561/// Analyze the statements in a loop to determine if the loop has
562/// a computable trip count and, if so, return a value that represents
563/// the trip count expression.
564///
565/// This function iterates over the phi nodes in the loop to check for
566/// induction variable patterns that are used in the calculation for
567/// the number of time the loop is executed.
568CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
570 MachineBasicBlock *TopMBB = L->getTopBlock();
572 assert(PI != TopMBB->pred_end() &&
573 "Loop must have more than one incoming edge!");
574 MachineBasicBlock *Backedge = *PI++;
575 if (PI == TopMBB->pred_end()) // dead loop?
576 return nullptr;
578 if (PI != TopMBB->pred_end()) // multiple backedges?
579 return nullptr;
580
581 // Make sure there is one incoming and one backedge and determine which
582 // is which.
583 if (L->contains(Incoming)) {
584 if (L->contains(Backedge))
585 return nullptr;
586 std::swap(Incoming, Backedge);
587 } else if (!L->contains(Backedge))
588 return nullptr;
589
590 // Look for the cmp instruction to determine if we can get a useful trip
591 // count. The trip count can be either a register or an immediate. The
592 // location of the value depends upon the type (reg or imm).
593 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
594 if (!ExitingBlock)
595 return nullptr;
596
597 Register IVReg = 0;
598 int64_t IVBump = 0;
599 MachineInstr *IVOp;
600 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
601 if (!FoundIV)
602 return nullptr;
603
604 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
605
606 MachineOperand *InitialValue = nullptr;
607 MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
608 MachineBasicBlock *Latch = L->getLoopLatch();
609 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
610 MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
611 if (MBB == Preheader)
612 InitialValue = &IV_Phi->getOperand(i);
613 else if (MBB == Latch)
614 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
615 }
616 if (!InitialValue)
617 return nullptr;
618
620 MachineBasicBlock *TB = nullptr, *FB = nullptr;
621 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
622 if (NotAnalyzed)
623 return nullptr;
624
625 MachineBasicBlock *Header = L->getHeader();
626 // TB must be non-null. If FB is also non-null, one of them must be
627 // the header. Otherwise, branch to TB could be exiting the loop, and
628 // the fall through can go to the header.
629 assert (TB && "Exit block without a branch?");
630 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
631 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
633 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
634 if (NotAnalyzed)
635 return nullptr;
636 if (TB == Latch)
637 TB = (LTB == Header) ? LTB : LFB;
638 else
639 FB = (LTB == Header) ? LTB: LFB;
640 }
641 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
642 if (!TB || (FB && TB != Header && FB != Header))
643 return nullptr;
644
645 // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
646 // to put imm(0), followed by P in the vector Cond.
647 // If TB is not the header, it means that the "not-taken" path must lead
648 // to the header.
649 bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
650 Register PredReg;
651 unsigned PredPos, PredRegFlags;
652 if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
653 return nullptr;
654 MachineInstr *CondI = MRI->getVRegDef(PredReg);
655 unsigned CondOpc = CondI->getOpcode();
656
657 Register CmpReg1, CmpReg2;
658 int64_t Mask = 0, ImmValue = 0;
659 bool AnalyzedCmp =
660 TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
661 if (!AnalyzedCmp)
662 return nullptr;
663
664 // The comparison operator type determines how we compute the loop
665 // trip count.
666 OldInsts.push_back(CondI);
667 OldInsts.push_back(IVOp);
668
669 // Sadly, the following code gets information based on the position
670 // of the operands in the compare instruction. This has to be done
671 // this way, because the comparisons check for a specific relationship
672 // between the operands (e.g. is-less-than), rather than to find out
673 // what relationship the operands are in (as on PPC).
674 Comparison::Kind Cmp;
675 bool isSwapped = false;
676 const MachineOperand &Op1 = CondI->getOperand(1);
677 const MachineOperand &Op2 = CondI->getOperand(2);
678 const MachineOperand *EndValue = nullptr;
679
680 if (Op1.isReg()) {
681 if (Op2.isImm() || Op1.getReg() == IVReg)
682 EndValue = &Op2;
683 else {
684 EndValue = &Op1;
685 isSwapped = true;
686 }
687 }
688
689 if (!EndValue)
690 return nullptr;
691
692 Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
693 if (!Cmp)
694 return nullptr;
695 if (Negated)
696 Cmp = Comparison::getNegatedComparison(Cmp);
697 if (isSwapped)
698 Cmp = Comparison::getSwappedComparison(Cmp);
699
700 if (InitialValue->isReg()) {
701 Register R = InitialValue->getReg();
702 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
703 if (!MDT->properlyDominates(DefBB, Header)) {
704 int64_t V;
705 if (!checkForImmediate(*InitialValue, V))
706 return nullptr;
707 }
708 OldInsts.push_back(MRI->getVRegDef(R));
709 }
710 if (EndValue->isReg()) {
711 Register R = EndValue->getReg();
712 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
713 if (!MDT->properlyDominates(DefBB, Header)) {
714 int64_t V;
715 if (!checkForImmediate(*EndValue, V))
716 return nullptr;
717 }
718 OldInsts.push_back(MRI->getVRegDef(R));
719 }
720
721 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
722}
723
724/// Helper function that returns the expression that represents the
725/// number of times a loop iterates. The function takes the operands that
726/// represent the loop start value, loop end value, and induction value.
727/// Based upon these operands, the function attempts to compute the trip count.
728CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
729 const MachineOperand *Start,
730 const MachineOperand *End,
731 Register IVReg,
732 int64_t IVBump,
733 Comparison::Kind Cmp) const {
734 // Cannot handle comparison EQ, i.e. while (A == B).
735 if (Cmp == Comparison::EQ)
736 return nullptr;
737
738 // Check if either the start or end values are an assignment of an immediate.
739 // If so, use the immediate value rather than the register.
740 if (Start->isReg()) {
741 const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
742 if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
743 StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
744 Start = &StartValInstr->getOperand(1);
745 }
746 if (End->isReg()) {
747 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
748 if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
749 EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
750 End = &EndValInstr->getOperand(1);
751 }
752
753 if (!Start->isReg() && !Start->isImm())
754 return nullptr;
755 if (!End->isReg() && !End->isImm())
756 return nullptr;
757
758 bool CmpLess = Cmp & Comparison::L;
759 bool CmpGreater = Cmp & Comparison::G;
760 bool CmpHasEqual = Cmp & Comparison::EQ;
761
762 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
763 if (CmpLess && IVBump < 0)
764 // Loop going while iv is "less" with the iv value going down. Must wrap.
765 return nullptr;
766
767 if (CmpGreater && IVBump > 0)
768 // Loop going while iv is "greater" with the iv value going up. Must wrap.
769 return nullptr;
770
771 // Phis that may feed into the loop.
772 LoopFeederMap LoopFeederPhi;
773
774 // Check if the initial value may be zero and can be decremented in the first
775 // iteration. If the value is zero, the endloop instruction will not decrement
776 // the loop counter, so we shouldn't generate a hardware loop in this case.
777 if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
778 LoopFeederPhi))
779 return nullptr;
780
781 if (Start->isImm() && End->isImm()) {
782 // Both, start and end are immediates.
783 int64_t StartV = Start->getImm();
784 int64_t EndV = End->getImm();
785 int64_t Dist = EndV - StartV;
786 if (Dist == 0)
787 return nullptr;
788
789 bool Exact = (Dist % IVBump) == 0;
790
791 if (Cmp == Comparison::NE) {
792 if (!Exact)
793 return nullptr;
794 if ((Dist < 0) ^ (IVBump < 0))
795 return nullptr;
796 }
797
798 // For comparisons that include the final value (i.e. include equality
799 // with the final value), we need to increase the distance by 1.
800 if (CmpHasEqual)
801 Dist = Dist > 0 ? Dist+1 : Dist-1;
802
803 // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
804 // CmpGreater should imply Dist < 0. These conditions could actually
805 // fail, for example, in unreachable code (which may still appear to be
806 // reachable in the CFG).
807 if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
808 return nullptr;
809
810 // "Normalized" distance, i.e. with the bump set to +-1.
811 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
812 : (-Dist + (-IVBump - 1)) / (-IVBump);
813 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
814
815 uint64_t Count = Dist1;
816
817 if (Count > 0xFFFFFFFFULL)
818 return nullptr;
819
820 return new CountValue(CountValue::CV_Immediate, Count);
821 }
822
823 // A general case: Start and End are some values, but the actual
824 // iteration count may not be available. If it is not, insert
825 // a computation of it into the preheader.
826
827 // If the induction variable bump is not a power of 2, quit.
828 // Othwerise we'd need a general integer division.
829 if (!isPowerOf2_64(std::abs(IVBump)))
830 return nullptr;
831
832 MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
833 assert (PH && "Should have a preheader by now");
835 DebugLoc DL;
836 if (InsertPos != PH->end())
837 DL = InsertPos->getDebugLoc();
838
839 // If Start is an immediate and End is a register, the trip count
840 // will be "reg - imm". Hexagon's "subtract immediate" instruction
841 // is actually "reg + -imm".
842
843 // If the loop IV is going downwards, i.e. if the bump is negative,
844 // then the iteration count (computed as End-Start) will need to be
845 // negated. To avoid the negation, just swap Start and End.
846 if (IVBump < 0) {
847 std::swap(Start, End);
848 IVBump = -IVBump;
849 }
850 // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
851 // Signedness, and "including equality" are preserved.
852
853 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
854 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
855
856 int64_t StartV = 0, EndV = 0;
857 if (Start->isImm())
858 StartV = Start->getImm();
859 if (End->isImm())
860 EndV = End->getImm();
861
862 int64_t AdjV = 0;
863 // To compute the iteration count, we would need this computation:
864 // Count = (End - Start + (IVBump-1)) / IVBump
865 // or, when CmpHasEqual:
866 // Count = (End - Start + (IVBump-1)+1) / IVBump
867 // The "IVBump-1" part is the adjustment (AdjV). We can avoid
868 // generating an instruction specifically to add it if we can adjust
869 // the immediate values for Start or End.
870
871 if (CmpHasEqual) {
872 // Need to add 1 to the total iteration count.
873 if (Start->isImm())
874 StartV--;
875 else if (End->isImm())
876 EndV++;
877 else
878 AdjV += 1;
879 }
880
881 if (Cmp != Comparison::NE) {
882 if (Start->isImm())
883 StartV -= (IVBump-1);
884 else if (End->isImm())
885 EndV += (IVBump-1);
886 else
887 AdjV += (IVBump-1);
888 }
889
890 Register R = 0;
891 unsigned SR = 0;
892 if (Start->isReg()) {
893 R = Start->getReg();
894 SR = Start->getSubReg();
895 } else {
896 R = End->getReg();
897 SR = End->getSubReg();
898 }
899 const TargetRegisterClass *RC = MRI->getRegClass(R);
900 // Hardware loops cannot handle 64-bit registers. If it's a double
901 // register, it has to have a subregister.
902 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
903 return nullptr;
904 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
905
906 // Compute DistR (register with the distance between Start and End).
907 Register DistR;
908 unsigned DistSR;
909
910 // Avoid special case, where the start value is an imm(0).
911 if (Start->isImm() && StartV == 0) {
912 DistR = End->getReg();
913 DistSR = End->getSubReg();
914 } else {
915 const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
916 (RegToImm ? TII->get(Hexagon::A2_subri) :
917 TII->get(Hexagon::A2_addi));
918 if (RegToReg || RegToImm) {
919 Register SubR = MRI->createVirtualRegister(IntRC);
920 MachineInstrBuilder SubIB =
921 BuildMI(*PH, InsertPos, DL, SubD, SubR);
922
923 if (RegToReg)
924 SubIB.addReg(End->getReg(), 0, End->getSubReg())
925 .addReg(Start->getReg(), 0, Start->getSubReg());
926 else
927 SubIB.addImm(EndV)
928 .addReg(Start->getReg(), 0, Start->getSubReg());
929 DistR = SubR;
930 } else {
931 // If the loop has been unrolled, we should use the original loop count
932 // instead of recalculating the value. This will avoid additional
933 // 'Add' instruction.
934 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
935 if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
936 EndValInstr->getOperand(1).getSubReg() == 0 &&
937 EndValInstr->getOperand(2).getImm() == StartV) {
938 DistR = EndValInstr->getOperand(1).getReg();
939 } else {
940 Register SubR = MRI->createVirtualRegister(IntRC);
941 MachineInstrBuilder SubIB =
942 BuildMI(*PH, InsertPos, DL, SubD, SubR);
943 SubIB.addReg(End->getReg(), 0, End->getSubReg())
944 .addImm(-StartV);
945 DistR = SubR;
946 }
947 }
948 DistSR = 0;
949 }
950
951 // From DistR, compute AdjR (register with the adjusted distance).
952 Register AdjR;
953 unsigned AdjSR;
954
955 if (AdjV == 0) {
956 AdjR = DistR;
957 AdjSR = DistSR;
958 } else {
959 // Generate CountR = ADD DistR, AdjVal
960 Register AddR = MRI->createVirtualRegister(IntRC);
961 MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
962 BuildMI(*PH, InsertPos, DL, AddD, AddR)
963 .addReg(DistR, 0, DistSR)
964 .addImm(AdjV);
965
966 AdjR = AddR;
967 AdjSR = 0;
968 }
969
970 // From AdjR, compute CountR (register with the final count).
971 Register CountR;
972 unsigned CountSR;
973
974 if (IVBump == 1) {
975 CountR = AdjR;
976 CountSR = AdjSR;
977 } else {
978 // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
979 unsigned Shift = Log2_32(IVBump);
980
981 // Generate NormR = LSR DistR, Shift.
982 Register LsrR = MRI->createVirtualRegister(IntRC);
983 const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
984 BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
985 .addReg(AdjR, 0, AdjSR)
986 .addImm(Shift);
987
988 CountR = LsrR;
989 CountSR = 0;
990 }
991
992 return new CountValue(CountValue::CV_Register, CountR, CountSR);
993}
994
995/// Return true if the operation is invalid within hardware loop.
996bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
997 bool IsInnerHWLoop) const {
998 // Call is not allowed because the callee may use a hardware loop except for
999 // the case when the call never returns.
1000 if (MI->getDesc().isCall())
1001 return !TII->doesNotReturn(*MI);
1002
1003 // Check if the instruction defines a hardware loop register.
1004 using namespace Hexagon;
1005
1006 static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1007 static const Register Regs1[] = { LC1, SA1 };
1008 auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1);
1009 for (Register R : CheckRegs)
1010 if (MI->modifiesRegister(R, TRI))
1011 return true;
1012
1013 return false;
1014}
1015
1016/// Return true if the loop contains an instruction that inhibits
1017/// the use of the hardware loop instruction.
1018bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1019 bool IsInnerHWLoop) const {
1020 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1021 << printMBBReference(**L->block_begin()));
1022 for (MachineBasicBlock *MBB : L->getBlocks()) {
1023 for (const MachineInstr &MI : *MBB) {
1024 if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
1025 LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1026 MI.dump(););
1027 return true;
1028 }
1029 }
1030 }
1031 return false;
1032}
1033
1034/// Returns true if the instruction is dead. This was essentially
1035/// copied from DeadMachineInstructionElim::isDead, but with special cases
1036/// for inline asm, physical registers and instructions with side effects
1037/// removed.
1038bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1039 SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1040 // Examine each operand.
1041 for (const MachineOperand &MO : MI->operands()) {
1042 if (!MO.isReg() || !MO.isDef())
1043 continue;
1044
1045 Register Reg = MO.getReg();
1046 if (MRI->use_nodbg_empty(Reg))
1047 continue;
1048
1049 using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1050
1051 // This instruction has users, but if the only user is the phi node for the
1052 // parent block, and the only use of that phi node is this instruction, then
1053 // this instruction is dead: both it (and the phi node) can be removed.
1054 use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1055 use_nodbg_iterator End = MRI->use_nodbg_end();
1056 if (std::next(I) != End || !I->getParent()->isPHI())
1057 return false;
1058
1059 MachineInstr *OnePhi = I->getParent();
1060 for (const MachineOperand &OPO : OnePhi->operands()) {
1061 if (!OPO.isReg() || !OPO.isDef())
1062 continue;
1063
1064 Register OPReg = OPO.getReg();
1065 use_nodbg_iterator nextJ;
1066 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1067 J != End; J = nextJ) {
1068 nextJ = std::next(J);
1069 MachineOperand &Use = *J;
1070 MachineInstr *UseMI = Use.getParent();
1071
1072 // If the phi node has a user that is not MI, bail.
1073 if (MI != UseMI)
1074 return false;
1075 }
1076 }
1077 DeadPhis.push_back(OnePhi);
1078 }
1079
1080 // If there are no defs with uses, the instruction is dead.
1081 return true;
1082}
1083
1084void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1085 // This procedure was essentially copied from DeadMachineInstructionElim.
1086
1088 if (isDead(MI, DeadPhis)) {
1089 LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1090
1091 // It is possible that some DBG_VALUE instructions refer to this
1092 // instruction. Examine each def operand for such references;
1093 // if found, mark the DBG_VALUE as undef (but don't delete it).
1094 for (const MachineOperand &MO : MI->operands()) {
1095 if (!MO.isReg() || !MO.isDef())
1096 continue;
1097 Register Reg = MO.getReg();
1098 // We use make_early_inc_range here because setReg below invalidates the
1099 // iterator.
1100 for (MachineOperand &MO :
1101 llvm::make_early_inc_range(MRI->use_operands(Reg))) {
1103 if (UseMI == MI)
1104 continue;
1105 if (MO.isDebug())
1106 MO.setReg(0U);
1107 }
1108 }
1109
1110 MI->eraseFromParent();
1111 for (unsigned i = 0; i < DeadPhis.size(); ++i)
1112 DeadPhis[i]->eraseFromParent();
1113 }
1114}
1115
1116/// Check if the loop is a candidate for converting to a hardware
1117/// loop. If so, then perform the transformation.
1118///
1119/// This function works on innermost loops first. A loop can be converted
1120/// if it is a counting loop; either a register value or an immediate.
1121///
1122/// The code makes several assumptions about the representation of the loop
1123/// in llvm.
1124bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1125 bool &RecL0used,
1126 bool &RecL1used) {
1127 // This is just to confirm basic correctness.
1128 assert(L->getHeader() && "Loop without a header?");
1129
1130 bool Changed = false;
1131 bool L0Used = false;
1132 bool L1Used = false;
1133
1134 // Process nested loops first.
1135 for (MachineLoop *I : *L) {
1136 Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
1137 L0Used |= RecL0used;
1138 L1Used |= RecL1used;
1139 }
1140
1141 // If a nested loop has been converted, then we can't convert this loop.
1142 if (Changed && L0Used && L1Used)
1143 return Changed;
1144
1145 unsigned LOOP_i;
1146 unsigned LOOP_r;
1147 unsigned ENDLOOP;
1148
1149 // Flag used to track loopN instruction:
1150 // 1 - Hardware loop is being generated for the inner most loop.
1151 // 0 - Hardware loop is being generated for the outer loop.
1152 unsigned IsInnerHWLoop = 1;
1153
1154 if (L0Used) {
1155 LOOP_i = Hexagon::J2_loop1i;
1156 LOOP_r = Hexagon::J2_loop1r;
1157 ENDLOOP = Hexagon::ENDLOOP1;
1158 IsInnerHWLoop = 0;
1159 } else {
1160 LOOP_i = Hexagon::J2_loop0i;
1161 LOOP_r = Hexagon::J2_loop0r;
1162 ENDLOOP = Hexagon::ENDLOOP0;
1163 }
1164
1165#ifndef NDEBUG
1166 // Stop trying after reaching the limit (if any).
1167 int Limit = HWLoopLimit;
1168 if (Limit >= 0) {
1169 if (Counter >= HWLoopLimit)
1170 return false;
1171 Counter++;
1172 }
1173#endif
1174
1175 // Does the loop contain any invalid instructions?
1176 if (containsInvalidInstruction(L, IsInnerHWLoop))
1177 return false;
1178
1179 MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1180 // Don't generate hw loop if the loop has more than one exit.
1181 if (!LastMBB)
1182 return false;
1183
1185 if (LastI == LastMBB->end())
1186 return false;
1187
1188 // Is the induction variable bump feeding the latch condition?
1189 if (!fixupInductionVariable(L))
1190 return false;
1191
1192 // Ensure the loop has a preheader: the loop instruction will be
1193 // placed there.
1194 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1195 if (!Preheader) {
1196 Preheader = createPreheaderForLoop(L);
1197 if (!Preheader)
1198 return false;
1199 }
1200
1201 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1202
1204 // Are we able to determine the trip count for the loop?
1205 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1206 if (!TripCount)
1207 return false;
1208
1209 // Is the trip count available in the preheader?
1210 if (TripCount->isReg()) {
1211 // There will be a use of the register inserted into the preheader,
1212 // so make sure that the register is actually defined at that point.
1213 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1214 MachineBasicBlock *BBDef = TCDef->getParent();
1215 if (!MDT->dominates(BBDef, Preheader))
1216 return false;
1217 }
1218
1219 // Determine the loop start.
1220 MachineBasicBlock *TopBlock = L->getTopBlock();
1221 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1222 MachineBasicBlock *LoopStart = nullptr;
1223 if (ExitingBlock != L->getLoopLatch()) {
1224 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1226
1227 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1228 return false;
1229
1230 if (L->contains(TB))
1231 LoopStart = TB;
1232 else if (L->contains(FB))
1233 LoopStart = FB;
1234 else
1235 return false;
1236 }
1237 else
1238 LoopStart = TopBlock;
1239
1240 // Convert the loop to a hardware loop.
1241 LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1242 DebugLoc DL;
1243 if (InsertPos != Preheader->end())
1244 DL = InsertPos->getDebugLoc();
1245
1246 if (TripCount->isReg()) {
1247 // Create a copy of the loop count register.
1248 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1249 BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1250 .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1251 // Add the Loop instruction to the beginning of the loop.
1252 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1253 .addReg(CountReg);
1254 } else {
1255 assert(TripCount->isImm() && "Expecting immediate value for trip count");
1256 // Add the Loop immediate instruction to the beginning of the loop,
1257 // if the immediate fits in the instructions. Otherwise, we need to
1258 // create a new virtual register.
1259 int64_t CountImm = TripCount->getImm();
1260 if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1261 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1262 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1263 .addImm(CountImm);
1264 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1265 .addMBB(LoopStart).addReg(CountReg);
1266 } else
1267 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1268 .addMBB(LoopStart).addImm(CountImm);
1269 }
1270
1271 // Make sure the loop start always has a reference in the CFG.
1272 LoopStart->setMachineBlockAddressTaken();
1273
1274 // Replace the loop branch with an endloop instruction.
1275 DebugLoc LastIDL = LastI->getDebugLoc();
1276 BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1277
1278 // The loop ends with either:
1279 // - a conditional branch followed by an unconditional branch, or
1280 // - a conditional branch to the loop start.
1281 if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1282 LastI->getOpcode() == Hexagon::J2_jumpf) {
1283 // Delete one and change/add an uncond. branch to out of the loop.
1284 MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1285 LastI = LastMBB->erase(LastI);
1286 if (!L->contains(BranchTarget)) {
1287 if (LastI != LastMBB->end())
1288 LastI = LastMBB->erase(LastI);
1290 TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1291 }
1292 } else {
1293 // Conditional branch to loop start; just delete it.
1294 LastMBB->erase(LastI);
1295 }
1296 delete TripCount;
1297
1298 // The induction operation and the comparison may now be
1299 // unneeded. If these are unneeded, then remove them.
1300 for (unsigned i = 0; i < OldInsts.size(); ++i)
1301 removeIfDead(OldInsts[i]);
1302
1303 ++NumHWLoops;
1304
1305 // Set RecL1used and RecL0used only after hardware loop has been
1306 // successfully generated. Doing it earlier can cause wrong loop instruction
1307 // to be used.
1308 if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1309 RecL1used = true;
1310 else
1311 RecL0used = true;
1312
1313 return true;
1314}
1315
1316bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1317 MachineInstr *CmpI) {
1318 assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1319
1320 MachineBasicBlock *BB = BumpI->getParent();
1321 if (CmpI->getParent() != BB)
1322 return false;
1323
1324 using instr_iterator = MachineBasicBlock::instr_iterator;
1325
1326 // Check if things are in order to begin with.
1327 for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1328 if (&*I == CmpI)
1329 return true;
1330
1331 // Out of order.
1332 Register PredR = CmpI->getOperand(0).getReg();
1333 bool FoundBump = false;
1334 instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1335 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1336 MachineInstr *In = &*I;
1337 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1338 MachineOperand &MO = In->getOperand(i);
1339 if (MO.isReg() && MO.isUse()) {
1340 if (MO.getReg() == PredR) // Found an intervening use of PredR.
1341 return false;
1342 }
1343 }
1344
1345 if (In == BumpI) {
1346 BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1347 FoundBump = true;
1348 break;
1349 }
1350 }
1351 assert (FoundBump && "Cannot determine instruction order");
1352 return FoundBump;
1353}
1354
1355/// This function is required to break recursion. Visiting phis in a loop may
1356/// result in recursion during compilation. We break the recursion by making
1357/// sure that we visit a MachineOperand and its definition in a
1358/// MachineInstruction only once. If we attempt to visit more than once, then
1359/// there is recursion, and will return false.
1360bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1362 const MachineOperand *MO,
1363 LoopFeederMap &LoopFeederPhi) const {
1364 if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1365 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1366 << printMBBReference(**L->block_begin()));
1367 // Ignore all BBs that form Loop.
1368 if (llvm::is_contained(L->getBlocks(), A))
1369 return false;
1370 MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1371 LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1372 return true;
1373 } else
1374 // Already visited node.
1375 return false;
1376}
1377
1378/// Return true if a Phi may generate a value that can underflow.
1379/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1380bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1381 MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1382 MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1383 assert(Phi->isPHI() && "Expecting a Phi.");
1384 // Walk through each Phi, and its used operands. Make sure that
1385 // if there is recursion in Phi, we won't generate hardware loops.
1386 for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1387 if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1388 if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1389 Phi->getParent(), L, LoopFeederPhi))
1390 return true;
1391 return false;
1392}
1393
1394/// Return true if the induction variable can underflow in the first iteration.
1395/// An example, is an initial unsigned value that is 0 and is decrement in the
1396/// first itertion of a do-while loop. In this case, we cannot generate a
1397/// hardware loop because the endloop instruction does not decrement the loop
1398/// counter if it is <= 1. We only need to perform this analysis if the
1399/// initial value is a register.
1400///
1401/// This function assumes the initial value may underfow unless proven
1402/// otherwise. If the type is signed, then we don't care because signed
1403/// underflow is undefined. We attempt to prove the initial value is not
1404/// zero by perfoming a crude analysis of the loop counter. This function
1405/// checks if the initial value is used in any comparison prior to the loop
1406/// and, if so, assumes the comparison is a range check. This is inexact,
1407/// but will catch the simple cases.
1408bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1409 const MachineOperand *InitVal, const MachineOperand *EndVal,
1411 LoopFeederMap &LoopFeederPhi) const {
1412 // Only check register values since they are unknown.
1413 if (!InitVal->isReg())
1414 return false;
1415
1416 if (!EndVal->isImm())
1417 return false;
1418
1419 // A register value that is assigned an immediate is a known value, and it
1420 // won't underflow in the first iteration.
1421 int64_t Imm;
1422 if (checkForImmediate(*InitVal, Imm))
1423 return (EndVal->getImm() == Imm);
1424
1425 Register Reg = InitVal->getReg();
1426
1427 // We don't know the value of a physical register.
1428 if (!Reg.isVirtual())
1429 return true;
1430
1431 MachineInstr *Def = MRI->getVRegDef(Reg);
1432 if (!Def)
1433 return true;
1434
1435 // If the initial value is a Phi or copy and the operands may not underflow,
1436 // then the definition cannot be underflow either.
1437 if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1438 L, LoopFeederPhi))
1439 return false;
1440 if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1441 EndVal, Def->getParent(),
1442 L, LoopFeederPhi))
1443 return false;
1444
1445 // Iterate over the uses of the initial value. If the initial value is used
1446 // in a compare, then we assume this is a range check that ensures the loop
1447 // doesn't underflow. This is not an exact test and should be improved.
1448 for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1449 E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1450 MachineInstr *MI = &*I;
1451 Register CmpReg1, CmpReg2;
1452 int64_t CmpMask = 0, CmpValue = 0;
1453
1454 if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1455 continue;
1456
1457 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1459 if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1460 continue;
1461
1462 Comparison::Kind Cmp =
1463 getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1464 if (Cmp == 0)
1465 continue;
1466 if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1467 Cmp = Comparison::getNegatedComparison(Cmp);
1468 if (CmpReg2 != 0 && CmpReg2 == Reg)
1469 Cmp = Comparison::getSwappedComparison(Cmp);
1470
1471 // Signed underflow is undefined.
1472 if (Comparison::isSigned(Cmp))
1473 return false;
1474
1475 // Check if there is a comparison of the initial value. If the initial value
1476 // is greater than or not equal to another value, then assume this is a
1477 // range check.
1478 if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1479 return false;
1480 }
1481
1482 // OK - this is a hack that needs to be improved. We really need to analyze
1483 // the instructions performed on the initial value. This works on the simplest
1484 // cases only.
1485 if (!Def->isCopy() && !Def->isPHI())
1486 return false;
1487
1488 return true;
1489}
1490
1491bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1492 int64_t &Val) const {
1493 if (MO.isImm()) {
1494 Val = MO.getImm();
1495 return true;
1496 }
1497 if (!MO.isReg())
1498 return false;
1499
1500 // MO is a register. Check whether it is defined as an immediate value,
1501 // and if so, get the value of it in TV. That value will then need to be
1502 // processed to handle potential subregisters in MO.
1503 int64_t TV;
1504
1505 Register R = MO.getReg();
1506 if (!R.isVirtual())
1507 return false;
1508 MachineInstr *DI = MRI->getVRegDef(R);
1509 unsigned DOpc = DI->getOpcode();
1510 switch (DOpc) {
1511 case TargetOpcode::COPY:
1512 case Hexagon::A2_tfrsi:
1513 case Hexagon::A2_tfrpi:
1514 case Hexagon::CONST32:
1515 case Hexagon::CONST64:
1516 // Call recursively to avoid an extra check whether operand(1) is
1517 // indeed an immediate (it could be a global address, for example),
1518 // plus we can handle COPY at the same time.
1519 if (!checkForImmediate(DI->getOperand(1), TV))
1520 return false;
1521 break;
1522 case Hexagon::A2_combineii:
1523 case Hexagon::A4_combineir:
1524 case Hexagon::A4_combineii:
1525 case Hexagon::A4_combineri:
1526 case Hexagon::A2_combinew: {
1527 const MachineOperand &S1 = DI->getOperand(1);
1528 const MachineOperand &S2 = DI->getOperand(2);
1529 int64_t V1, V2;
1530 if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1531 return false;
1532 TV = V2 | (static_cast<uint64_t>(V1) << 32);
1533 break;
1534 }
1535 case TargetOpcode::REG_SEQUENCE: {
1536 const MachineOperand &S1 = DI->getOperand(1);
1537 const MachineOperand &S3 = DI->getOperand(3);
1538 int64_t V1, V3;
1539 if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1540 return false;
1541 unsigned Sub2 = DI->getOperand(2).getImm();
1542 unsigned Sub4 = DI->getOperand(4).getImm();
1543 if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1544 TV = V1 | (V3 << 32);
1545 else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1546 TV = V3 | (V1 << 32);
1547 else
1548 llvm_unreachable("Unexpected form of REG_SEQUENCE");
1549 break;
1550 }
1551
1552 default:
1553 return false;
1554 }
1555
1556 // By now, we should have successfully obtained the immediate value defining
1557 // the register referenced in MO. Handle a potential use of a subregister.
1558 switch (MO.getSubReg()) {
1559 case Hexagon::isub_lo:
1560 Val = TV & 0xFFFFFFFFULL;
1561 break;
1562 case Hexagon::isub_hi:
1563 Val = (TV >> 32) & 0xFFFFFFFFULL;
1564 break;
1565 default:
1566 Val = TV;
1567 break;
1568 }
1569 return true;
1570}
1571
1572void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1573 if (MO.isImm()) {
1574 MO.setImm(Val);
1575 return;
1576 }
1577
1578 assert(MO.isReg());
1579 Register R = MO.getReg();
1580 MachineInstr *DI = MRI->getVRegDef(R);
1581
1582 const TargetRegisterClass *RC = MRI->getRegClass(R);
1583 Register NewR = MRI->createVirtualRegister(RC);
1584 MachineBasicBlock &B = *DI->getParent();
1585 DebugLoc DL = DI->getDebugLoc();
1586 BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1587 MO.setReg(NewR);
1588}
1589
1590bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1591 MachineBasicBlock *Header = L->getHeader();
1592 MachineBasicBlock *Latch = L->getLoopLatch();
1593 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1594
1595 if (!(Header && Latch && ExitingBlock))
1596 return false;
1597
1598 // These data structures follow the same concept as the corresponding
1599 // ones in findInductionRegister (where some comments are).
1600 using RegisterBump = std::pair<Register, int64_t>;
1601 using RegisterInduction = std::pair<Register, RegisterBump>;
1602 using RegisterInductionSet = std::set<RegisterInduction>;
1603
1604 // Register candidates for induction variables, with their associated bumps.
1605 RegisterInductionSet IndRegs;
1606
1607 // Look for induction patterns:
1608 // %1 = PHI ..., [ latch, %2 ]
1609 // %2 = ADD %1, imm
1610 using instr_iterator = MachineBasicBlock::instr_iterator;
1611
1612 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1613 I != E && I->isPHI(); ++I) {
1614 MachineInstr *Phi = &*I;
1615
1616 // Have a PHI instruction.
1617 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1618 if (Phi->getOperand(i+1).getMBB() != Latch)
1619 continue;
1620
1621 Register PhiReg = Phi->getOperand(i).getReg();
1622 MachineInstr *DI = MRI->getVRegDef(PhiReg);
1623
1624 if (DI->getDesc().isAdd()) {
1625 // If the register operand to the add/sub is the PHI we are looking
1626 // at, this meets the induction pattern.
1627 Register IndReg = DI->getOperand(1).getReg();
1628 MachineOperand &Opnd2 = DI->getOperand(2);
1629 int64_t V;
1630 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1631 Register UpdReg = DI->getOperand(0).getReg();
1632 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1633 }
1634 }
1635 } // for (i)
1636 } // for (instr)
1637
1638 if (IndRegs.empty())
1639 return false;
1640
1641 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1643 // analyzeBranch returns true if it fails to analyze branch.
1644 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1645 if (NotAnalyzed || Cond.empty())
1646 return false;
1647
1648 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1649 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1651 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1652 if (NotAnalyzed)
1653 return false;
1654
1655 // Since latch is not the exiting block, the latch branch should be an
1656 // unconditional branch to the loop header.
1657 if (TB == Latch)
1658 TB = (LTB == Header) ? LTB : LFB;
1659 else
1660 FB = (LTB == Header) ? LTB : LFB;
1661 }
1662 if (TB != Header) {
1663 if (FB != Header) {
1664 // The latch/exit block does not go back to the header.
1665 return false;
1666 }
1667 // FB is the header (i.e., uncond. jump to branch header)
1668 // In this case, the LoopBody -> TB should not be a back edge otherwise
1669 // it could result in an infinite loop after conversion to hw_loop.
1670 // This case can happen when the Latch has two jumps like this:
1671 // Jmp_c OuterLoopHeader <-- TB
1672 // Jmp InnerLoopHeader <-- FB
1673 if (MDT->dominates(TB, FB))
1674 return false;
1675 }
1676
1677 // Expecting a predicate register as a condition. It won't be a hardware
1678 // predicate register at this point yet, just a vreg.
1679 // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1680 // into Cond, followed by the predicate register. For non-negated branches
1681 // it's just the register.
1682 unsigned CSz = Cond.size();
1683 if (CSz != 1 && CSz != 2)
1684 return false;
1685
1686 if (!Cond[CSz-1].isReg())
1687 return false;
1688
1689 Register P = Cond[CSz - 1].getReg();
1690 MachineInstr *PredDef = MRI->getVRegDef(P);
1691
1692 if (!PredDef->isCompare())
1693 return false;
1694
1695 SmallSet<Register,2> CmpRegs;
1696 MachineOperand *CmpImmOp = nullptr;
1697
1698 // Go over all operands to the compare and look for immediate and register
1699 // operands. Assume that if the compare has a single register use and a
1700 // single immediate operand, then the register is being compared with the
1701 // immediate value.
1702 for (MachineOperand &MO : PredDef->operands()) {
1703 if (MO.isReg()) {
1704 // Skip all implicit references. In one case there was:
1705 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1706 if (MO.isImplicit())
1707 continue;
1708 if (MO.isUse()) {
1709 if (!isImmediate(MO)) {
1710 CmpRegs.insert(MO.getReg());
1711 continue;
1712 }
1713 // Consider the register to be the "immediate" operand.
1714 if (CmpImmOp)
1715 return false;
1716 CmpImmOp = &MO;
1717 }
1718 } else if (MO.isImm()) {
1719 if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1720 return false;
1721 CmpImmOp = &MO;
1722 }
1723 }
1724
1725 if (CmpRegs.empty())
1726 return false;
1727
1728 // Check if the compared register follows the order we want. Fix if needed.
1729 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1730 I != E; ++I) {
1731 // This is a success. If the register used in the comparison is one that
1732 // we have identified as a bumped (updated) induction register, there is
1733 // nothing to do.
1734 if (CmpRegs.count(I->first))
1735 return true;
1736
1737 // Otherwise, if the register being compared comes out of a PHI node,
1738 // and has been recognized as following the induction pattern, and is
1739 // compared against an immediate, we can fix it.
1740 const RegisterBump &RB = I->second;
1741 if (CmpRegs.count(RB.first)) {
1742 if (!CmpImmOp) {
1743 // If both operands to the compare instruction are registers, see if
1744 // it can be changed to use induction register as one of the operands.
1745 MachineInstr *IndI = nullptr;
1746 MachineInstr *nonIndI = nullptr;
1747 MachineOperand *IndMO = nullptr;
1748 MachineOperand *nonIndMO = nullptr;
1749
1750 for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1751 MachineOperand &MO = PredDef->getOperand(i);
1752 if (MO.isReg() && MO.getReg() == RB.first) {
1753 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1754 << ") = " << *(MRI->getVRegDef(I->first)));
1755 if (IndI)
1756 return false;
1757
1758 IndI = MRI->getVRegDef(I->first);
1759 IndMO = &MO;
1760 } else if (MO.isReg()) {
1761 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1762 << ") = " << *(MRI->getVRegDef(MO.getReg())));
1763 if (nonIndI)
1764 return false;
1765
1766 nonIndI = MRI->getVRegDef(MO.getReg());
1767 nonIndMO = &MO;
1768 }
1769 }
1770 if (IndI && nonIndI &&
1771 nonIndI->getOpcode() == Hexagon::A2_addi &&
1772 nonIndI->getOperand(2).isImm() &&
1773 nonIndI->getOperand(2).getImm() == - RB.second) {
1774 bool Order = orderBumpCompare(IndI, PredDef);
1775 if (Order) {
1776 IndMO->setReg(I->first);
1777 nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1778 return true;
1779 }
1780 }
1781 return false;
1782 }
1783
1784 // It is not valid to do this transformation on an unsigned comparison
1785 // because it may underflow.
1786 Comparison::Kind Cmp =
1787 getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1788 if (!Cmp || Comparison::isUnsigned(Cmp))
1789 return false;
1790
1791 // If the register is being compared against an immediate, try changing
1792 // the compare instruction to use induction register and adjust the
1793 // immediate operand.
1794 int64_t CmpImm = getImmediate(*CmpImmOp);
1795 int64_t V = RB.second;
1796 // Handle Overflow (64-bit).
1797 if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1798 ((V < 0) && (CmpImm < INT64_MIN - V)))
1799 return false;
1800 CmpImm += V;
1801 // Most comparisons of register against an immediate value allow
1802 // the immediate to be constant-extended. There are some exceptions
1803 // though. Make sure the new combination will work.
1804 if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
1805 !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
1806 return false;
1807
1808 // Make sure that the compare happens after the bump. Otherwise,
1809 // after the fixup, the compare would use a yet-undefined register.
1810 MachineInstr *BumpI = MRI->getVRegDef(I->first);
1811 bool Order = orderBumpCompare(BumpI, PredDef);
1812 if (!Order)
1813 return false;
1814
1815 // Finally, fix the compare instruction.
1816 setImmediate(*CmpImmOp, CmpImm);
1817 for (MachineOperand &MO : PredDef->operands()) {
1818 if (MO.isReg() && MO.getReg() == RB.first) {
1819 MO.setReg(I->first);
1820 return true;
1821 }
1822 }
1823 }
1824 }
1825
1826 return false;
1827}
1828
1829/// createPreheaderForLoop - Create a preheader for a given loop.
1830MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1831 MachineLoop *L) {
1832 if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1833 return TmpPH;
1834 if (!HWCreatePreheader)
1835 return nullptr;
1836
1837 MachineBasicBlock *Header = L->getHeader();
1838 MachineBasicBlock *Latch = L->getLoopLatch();
1839 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1840 MachineFunction *MF = Header->getParent();
1841 DebugLoc DL;
1842
1843#ifndef NDEBUG
1844 if ((!PHFn.empty()) && (PHFn != MF->getName()))
1845 return nullptr;
1846#endif
1847
1848 if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1849 return nullptr;
1850
1851 using instr_iterator = MachineBasicBlock::instr_iterator;
1852
1853 // Verify that all existing predecessors have analyzable branches
1854 // (or no branches at all).
1855 using MBBVector = std::vector<MachineBasicBlock *>;
1856
1857 MBBVector Preds(Header->pred_begin(), Header->pred_end());
1859 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1860
1861 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1862 return nullptr;
1863
1864 for (MachineBasicBlock *PB : Preds) {
1865 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1866 if (NotAnalyzed)
1867 return nullptr;
1868 }
1869
1871 MF->insert(Header->getIterator(), NewPH);
1872
1873 if (Header->pred_size() > 2) {
1874 // Ensure that the header has only two predecessors: the preheader and
1875 // the loop latch. Any additional predecessors of the header should
1876 // join at the newly created preheader. Inspect all PHI nodes from the
1877 // header and create appropriate corresponding PHI nodes in the preheader.
1878
1879 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1880 I != E && I->isPHI(); ++I) {
1881 MachineInstr *PN = &*I;
1882
1883 const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1884 MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1885 NewPH->insert(NewPH->end(), NewPN);
1886
1887 Register PR = PN->getOperand(0).getReg();
1888 const TargetRegisterClass *RC = MRI->getRegClass(PR);
1889 Register NewPR = MRI->createVirtualRegister(RC);
1890 NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1891
1892 // Copy all non-latch operands of a header's PHI node to the newly
1893 // created PHI node in the preheader.
1894 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1895 Register PredR = PN->getOperand(i).getReg();
1896 unsigned PredRSub = PN->getOperand(i).getSubReg();
1897 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1898 if (PredB == Latch)
1899 continue;
1900
1901 MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1902 MO.setSubReg(PredRSub);
1903 NewPN->addOperand(MO);
1905 }
1906
1907 // Remove copied operands from the old PHI node and add the value
1908 // coming from the preheader's PHI.
1909 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1910 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1911 if (PredB != Latch) {
1912 PN->removeOperand(i+1);
1913 PN->removeOperand(i);
1914 }
1915 }
1916 PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1918 }
1919 } else {
1920 assert(Header->pred_size() == 2);
1921
1922 // The header has only two predecessors, but the non-latch predecessor
1923 // is not a preheader (e.g. it has other successors, etc.)
1924 // In such a case we don't need any extra PHI nodes in the new preheader,
1925 // all we need is to adjust existing PHIs in the header to now refer to
1926 // the new preheader.
1927 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1928 I != E && I->isPHI(); ++I) {
1929 MachineInstr *PN = &*I;
1930 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1931 MachineOperand &MO = PN->getOperand(i+1);
1932 if (MO.getMBB() != Latch)
1933 MO.setMBB(NewPH);
1934 }
1935 }
1936 }
1937
1938 // "Reroute" the CFG edges to link in the new preheader.
1939 // If any of the predecessors falls through to the header, insert a branch
1940 // to the new preheader in that place.
1943
1944 TB = FB = nullptr;
1945
1946 for (MachineBasicBlock *PB : Preds) {
1947 if (PB != Latch) {
1948 Tmp2.clear();
1949 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1950 (void)NotAnalyzed; // suppress compiler warning
1951 assert (!NotAnalyzed && "Should be analyzable!");
1952 if (TB != Header && (Tmp2.empty() || FB != Header))
1953 TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1954 PB->ReplaceUsesOfBlockWith(Header, NewPH);
1955 }
1956 }
1957
1958 // It can happen that the latch block will fall through into the header.
1959 // Insert an unconditional branch to the header.
1960 TB = FB = nullptr;
1961 bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1962 (void)LatchNotAnalyzed; // suppress compiler warning
1963 assert (!LatchNotAnalyzed && "Should be analyzable!");
1964 if (!TB && !FB)
1965 TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1966
1967 // Finally, the branch from the preheader to the header.
1968 TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1969 NewPH->addSuccessor(Header);
1970
1971 MachineLoop *ParentLoop = L->getParentLoop();
1972 if (ParentLoop)
1973 ParentLoop->addBasicBlockToLoop(NewPH, *MLI);
1974
1975 // Update the dominator information with the new preheader.
1976 if (MDT) {
1977 if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1978 if (MachineDomTreeNode *DHN = HN->getIDom()) {
1979 MDT->addNewBlock(NewPH, DHN->getBlock());
1980 MDT->changeImmediateDominator(Header, NewPH);
1981 }
1982 }
1983 }
1984
1985 return NewPH;
1986}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const LLT S1
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
static bool isSigned(unsigned int Opcode)
const HexagonInstrInfo * TII
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::Hidden, cl::desc("Allow speculation of preheader " "instructions"))
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
Hexagon Hardware Loops
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool doesNotReturn(const MachineInstr &CallMI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool getPredReg(ArrayRef< MachineOperand > Cond, Register &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
bool isExtendable(const MachineInstr &MI) const
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:279
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
Instructions::iterator instr_iterator
instr_iterator instr_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
Definition: X86BaseInfo.h:735
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
Definition: X86BaseInfo.h:721
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
FunctionPass * createHexagonHardwareLoops()
void initializeHexagonHardwareLoopsPass(PassRegistry &)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define EQ(a, b)
Definition: regexec.c:112
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...