LLVM  3.7.0
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction. The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 // - Countable loops (w/ ind. var for a trip count)
22 // - Assumes loops are normalized by IndVarSimplify
23 // - Try inner-most loops first
24 // - No function calls in loops.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "llvm/ADT/SmallSet.h"
29 #include "Hexagon.h"
30 #include "HexagonSubtarget.h"
31 #include "llvm/ADT/Statistic.h"
38 #include "llvm/PassSupport.h"
40 #include "llvm/Support/Debug.h"
43 #include <algorithm>
44 #include <vector>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "hwloops"
49 
50 #ifndef NDEBUG
51 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
52 
53 // Option to create preheader only for a specific function.
54 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
55  cl::init(""));
56 #endif
57 
58 // Option to create a preheader if one doesn't exist.
59 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
60  cl::Hidden, cl::init(true),
61  cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
62 
63 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
64 
65 namespace llvm {
68 }
69 
70 namespace {
71  class CountValue;
72  struct HexagonHardwareLoops : public MachineFunctionPass {
73  MachineLoopInfo *MLI;
76  const HexagonInstrInfo *TII;
77 #ifndef NDEBUG
78  static int Counter;
79 #endif
80 
81  public:
82  static char ID;
83 
84  HexagonHardwareLoops() : MachineFunctionPass(ID) {
86  }
87 
88  bool runOnMachineFunction(MachineFunction &MF) override;
89 
90  const char *getPassName() const override { return "Hexagon Hardware Loops"; }
91 
92  void getAnalysisUsage(AnalysisUsage &AU) const override {
96  }
97 
98  private:
99  typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
100 
101  /// Kinds of comparisons in the compare instructions.
102  struct Comparison {
103  enum Kind {
104  EQ = 0x01,
105  NE = 0x02,
106  L = 0x04,
107  G = 0x08,
108  U = 0x40,
109  LTs = L,
110  LEs = L | EQ,
111  GTs = G,
112  GEs = G | EQ,
113  LTu = L | U,
114  LEu = L | EQ | U,
115  GTu = G | U,
116  GEu = G | EQ | U
117  };
118 
119  static Kind getSwappedComparison(Kind Cmp) {
120  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
121  if ((Cmp & L) || (Cmp & G))
122  return (Kind)(Cmp ^ (L|G));
123  return Cmp;
124  }
125 
126  static Kind getNegatedComparison(Kind Cmp) {
127  if ((Cmp & L) || (Cmp & G))
128  return (Kind)((Cmp ^ (L | G)) ^ EQ);
129  if ((Cmp & NE) || (Cmp & EQ))
130  return (Kind)(Cmp ^ (EQ | NE));
131  return (Kind)0;
132  }
133 
134  static bool isSigned(Kind Cmp) {
135  return (Cmp & (L | G) && !(Cmp & U));
136  }
137 
138  static bool isUnsigned(Kind Cmp) {
139  return (Cmp & U);
140  }
141 
142  };
143 
144  /// \brief Find the register that contains the loop controlling
145  /// induction variable.
146  /// If successful, it will return true and set the \p Reg, \p IVBump
147  /// and \p IVOp arguments. Otherwise it will return false.
148  /// The returned induction register is the register R that follows the
149  /// following induction pattern:
150  /// loop:
151  /// R = phi ..., [ R.next, LatchBlock ]
152  /// R.next = R + #bump
153  /// if (R.next < #N) goto loop
154  /// IVBump is the immediate value added to R, and IVOp is the instruction
155  /// "R.next = R + #bump".
156  bool findInductionRegister(MachineLoop *L, unsigned &Reg,
157  int64_t &IVBump, MachineInstr *&IVOp) const;
158 
159  /// \brief Return the comparison kind for the specified opcode.
160  Comparison::Kind getComparisonKind(unsigned CondOpc,
161  MachineOperand *InitialValue,
162  const MachineOperand *Endvalue,
163  int64_t IVBump) const;
164 
165  /// \brief Analyze the statements in a loop to determine if the loop
166  /// has a computable trip count and, if so, return a value that represents
167  /// the trip count expression.
168  CountValue *getLoopTripCount(MachineLoop *L,
170 
171  /// \brief Return the expression that represents the number of times
172  /// a loop iterates. The function takes the operands that represent the
173  /// loop start value, loop end value, and induction value. Based upon
174  /// these operands, the function attempts to compute the trip count.
175  /// If the trip count is not directly available (as an immediate value,
176  /// or a register), the function will attempt to insert computation of it
177  /// to the loop's preheader.
178  CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
179  const MachineOperand *End, unsigned IVReg,
180  int64_t IVBump, Comparison::Kind Cmp) const;
181 
182  /// \brief Return true if the instruction is not valid within a hardware
183  /// loop.
184  bool isInvalidLoopOperation(const MachineInstr *MI,
185  bool IsInnerHWLoop) const;
186 
187  /// \brief Return true if the loop contains an instruction that inhibits
188  /// using the hardware loop.
189  bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
190 
191  /// \brief Given a loop, check if we can convert it to a hardware loop.
192  /// If so, then perform the conversion and return true.
193  bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
194 
195  /// \brief Return true if the instruction is now dead.
196  bool isDead(const MachineInstr *MI,
197  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
198 
199  /// \brief Remove the instruction if it is now dead.
200  void removeIfDead(MachineInstr *MI);
201 
202  /// \brief Make sure that the "bump" instruction executes before the
203  /// compare. We need that for the IV fixup, so that the compare
204  /// instruction would not use a bumped value that has not yet been
205  /// defined. If the instructions are out of order, try to reorder them.
206  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
207 
208  /// \brief Return true if MO and MI pair is visited only once. If visited
209  /// more than once, this indicates there is recursion. In such a case,
210  /// return false.
211  bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
212  const MachineOperand *MO,
213  LoopFeederMap &LoopFeederPhi) const;
214 
215  /// \brief Return true if the Phi may generate a value that may underflow,
216  /// or may wrap.
217  bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
219  LoopFeederMap &LoopFeederPhi) const;
220 
221  /// \brief Return true if the induction variable may underflow an unsigned
222  /// value in the first iteration.
223  bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
224  const MachineOperand *EndVal,
226  LoopFeederMap &LoopFeederPhi) const;
227 
228  /// \brief Check if the given operand has a compile-time known constant
229  /// value. Return true if yes, and false otherwise. When returning true, set
230  /// Val to the corresponding constant value.
231  bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
232 
233  /// \brief Check if the operand has a compile-time known constant value.
234  bool isImmediate(const MachineOperand &MO) const {
235  int64_t V;
236  return checkForImmediate(MO, V);
237  }
238 
239  /// \brief Return the immediate for the specified operand.
240  int64_t getImmediate(const MachineOperand &MO) const {
241  int64_t V;
242  if (!checkForImmediate(MO, V))
243  llvm_unreachable("Invalid operand");
244  return V;
245  }
246 
247  /// \brief Reset the given machine operand to now refer to a new immediate
248  /// value. Assumes that the operand was already referencing an immediate
249  /// value, either directly, or via a register.
250  void setImmediate(MachineOperand &MO, int64_t Val);
251 
252  /// \brief Fix the data flow of the induction varible.
253  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
254  /// |
255  /// +-> back to phi
256  /// where "bump" is the increment of the induction variable:
257  /// iv = iv + #const.
258  /// Due to some prior code transformations, the actual flow may look
259  /// like this:
260  /// phi -+-> bump ---> back to phi
261  /// |
262  /// +-> comparison-in-latch (against upper_bound-bump),
263  /// i.e. the comparison that controls the loop execution may be using
264  /// the value of the induction variable from before the increment.
265  ///
266  /// Return true if the loop's flow is the desired one (i.e. it's
267  /// either been fixed, or no fixing was necessary).
268  /// Otherwise, return false. This can happen if the induction variable
269  /// couldn't be identified, or if the value in the latch's comparison
270  /// cannot be adjusted to reflect the post-bump value.
271  bool fixupInductionVariable(MachineLoop *L);
272 
273  /// \brief Given a loop, if it does not have a preheader, create one.
274  /// Return the block that is the preheader.
275  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
276  };
277 
278  char HexagonHardwareLoops::ID = 0;
279 #ifndef NDEBUG
280  int HexagonHardwareLoops::Counter = 0;
281 #endif
282 
283  /// \brief Abstraction for a trip count of a loop. A smaller version
284  /// of the MachineOperand class without the concerns of changing the
285  /// operand representation.
286  class CountValue {
287  public:
288  enum CountValueType {
289  CV_Register,
290  CV_Immediate
291  };
292  private:
293  CountValueType Kind;
294  union Values {
295  struct {
296  unsigned Reg;
297  unsigned Sub;
298  } R;
299  unsigned ImmVal;
300  } Contents;
301 
302  public:
303  explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
304  Kind = t;
305  if (Kind == CV_Register) {
306  Contents.R.Reg = v;
307  Contents.R.Sub = u;
308  } else {
309  Contents.ImmVal = v;
310  }
311  }
312  bool isReg() const { return Kind == CV_Register; }
313  bool isImm() const { return Kind == CV_Immediate; }
314 
315  unsigned getReg() const {
316  assert(isReg() && "Wrong CountValue accessor");
317  return Contents.R.Reg;
318  }
319  unsigned getSubReg() const {
320  assert(isReg() && "Wrong CountValue accessor");
321  return Contents.R.Sub;
322  }
323  unsigned getImm() const {
324  assert(isImm() && "Wrong CountValue accessor");
325  return Contents.ImmVal;
326  }
327 
328  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
329  if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
330  if (isImm()) { OS << Contents.ImmVal; }
331  }
332  };
333 } // end anonymous namespace
334 
335 
336 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
337  "Hexagon Hardware Loops", false, false)
340 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
341  "Hexagon Hardware Loops", false, false)
342 
344  return new HexagonHardwareLoops();
345 }
346 
347 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
348  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
349 
350  bool Changed = false;
351 
352  MLI = &getAnalysis<MachineLoopInfo>();
353  MRI = &MF.getRegInfo();
354  MDT = &getAnalysis<MachineDominatorTree>();
355  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
356 
357  for (auto &L : *MLI)
358  if (!L->getParentLoop()) {
359  bool L0Used = false;
360  bool L1Used = false;
361  Changed |= convertToHardwareLoop(L, L0Used, L1Used);
362  }
363 
364  return Changed;
365 }
366 
367 /// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
368 /// return the exiting block. Return 'null' when multiple exiting blocks are
369 /// present.
371  if (MachineBasicBlock *Latch = L->getLoopLatch()) {
372  if (L->isLoopExiting(Latch))
373  return Latch;
374  else
375  return L->getExitingBlock();
376  }
377  return nullptr;
378 }
379 
380 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
381  unsigned &Reg,
382  int64_t &IVBump,
383  MachineInstr *&IVOp
384  ) const {
385  MachineBasicBlock *Header = L->getHeader();
386  MachineBasicBlock *Preheader = L->getLoopPreheader();
387  MachineBasicBlock *Latch = L->getLoopLatch();
388  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
389  if (!Header || !Preheader || !Latch || !ExitingBlock)
390  return false;
391 
392  // This pair represents an induction register together with an immediate
393  // value that will be added to it in each loop iteration.
394  typedef std::pair<unsigned,int64_t> RegisterBump;
395 
396  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
397  // from an induction operation
398  // R.next = R + bump
399  // where bump is an immediate value.
400  typedef std::map<unsigned,RegisterBump> InductionMap;
401 
402  InductionMap IndMap;
403 
404  typedef MachineBasicBlock::instr_iterator instr_iterator;
405  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
406  I != E && I->isPHI(); ++I) {
407  MachineInstr *Phi = &*I;
408 
409  // Have a PHI instruction. Get the operand that corresponds to the
410  // latch block, and see if is a result of an addition of form "reg+imm",
411  // where the "reg" is defined by the PHI node we are looking at.
412  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
413  if (Phi->getOperand(i+1).getMBB() != Latch)
414  continue;
415 
416  unsigned PhiOpReg = Phi->getOperand(i).getReg();
417  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
418  unsigned UpdOpc = DI->getOpcode();
419  bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
420 
421  if (isAdd) {
422  // If the register operand to the add is the PHI we're looking at, this
423  // meets the induction pattern.
424  unsigned IndReg = DI->getOperand(1).getReg();
425  MachineOperand &Opnd2 = DI->getOperand(2);
426  int64_t V;
427  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
428  unsigned UpdReg = DI->getOperand(0).getReg();
429  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
430  }
431  }
432  } // for (i)
433  } // for (instr)
434 
436  MachineBasicBlock *TB = nullptr, *FB = nullptr;
437  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
438  if (NotAnalyzed)
439  return false;
440 
441  unsigned PredR, PredPos, PredRegFlags;
442  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
443  return false;
444 
445  MachineInstr *PredI = MRI->getVRegDef(PredR);
446  if (!PredI->isCompare())
447  return false;
448 
449  unsigned CmpReg1 = 0, CmpReg2 = 0;
450  int CmpImm = 0, CmpMask = 0;
451  bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
452  CmpMask, CmpImm);
453  // Fail if the compare was not analyzed, or it's not comparing a register
454  // with an immediate value. Not checking the mask here, since we handle
455  // the individual compare opcodes (including A4_cmpb*) later on.
456  if (!CmpAnalyzed)
457  return false;
458 
459  // Exactly one of the input registers to the comparison should be among
460  // the induction registers.
461  InductionMap::iterator IndMapEnd = IndMap.end();
462  InductionMap::iterator F = IndMapEnd;
463  if (CmpReg1 != 0) {
464  InductionMap::iterator F1 = IndMap.find(CmpReg1);
465  if (F1 != IndMapEnd)
466  F = F1;
467  }
468  if (CmpReg2 != 0) {
469  InductionMap::iterator F2 = IndMap.find(CmpReg2);
470  if (F2 != IndMapEnd) {
471  if (F != IndMapEnd)
472  return false;
473  F = F2;
474  }
475  }
476  if (F == IndMapEnd)
477  return false;
478 
479  Reg = F->second.first;
480  IVBump = F->second.second;
481  IVOp = MRI->getVRegDef(F->first);
482  return true;
483 }
484 
485 // Return the comparison kind for the specified opcode.
486 HexagonHardwareLoops::Comparison::Kind
487 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
488  MachineOperand *InitialValue,
489  const MachineOperand *EndValue,
490  int64_t IVBump) const {
492  switch (CondOpc) {
493  case Hexagon::C2_cmpeqi:
494  case Hexagon::C2_cmpeq:
495  case Hexagon::C2_cmpeqp:
496  Cmp = Comparison::EQ;
497  break;
498  case Hexagon::C4_cmpneq:
499  case Hexagon::C4_cmpneqi:
500  Cmp = Comparison::NE;
501  break;
502  case Hexagon::C4_cmplte:
503  Cmp = Comparison::LEs;
504  break;
505  case Hexagon::C4_cmplteu:
506  Cmp = Comparison::LEu;
507  break;
508  case Hexagon::C2_cmpgtui:
509  case Hexagon::C2_cmpgtu:
510  case Hexagon::C2_cmpgtup:
511  Cmp = Comparison::GTu;
512  break;
513  case Hexagon::C2_cmpgti:
514  case Hexagon::C2_cmpgt:
515  case Hexagon::C2_cmpgtp:
516  Cmp = Comparison::GTs;
517  break;
518  default:
519  return (Comparison::Kind)0;
520  }
521  return Cmp;
522 }
523 
524 /// \brief Analyze the statements in a loop to determine if the loop has
525 /// a computable trip count and, if so, return a value that represents
526 /// the trip count expression.
527 ///
528 /// This function iterates over the phi nodes in the loop to check for
529 /// induction variable patterns that are used in the calculation for
530 /// the number of time the loop is executed.
531 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
533  MachineBasicBlock *TopMBB = L->getTopBlock();
535  assert(PI != TopMBB->pred_end() &&
536  "Loop must have more than one incoming edge!");
537  MachineBasicBlock *Backedge = *PI++;
538  if (PI == TopMBB->pred_end()) // dead loop?
539  return nullptr;
540  MachineBasicBlock *Incoming = *PI++;
541  if (PI != TopMBB->pred_end()) // multiple backedges?
542  return nullptr;
543 
544  // Make sure there is one incoming and one backedge and determine which
545  // is which.
546  if (L->contains(Incoming)) {
547  if (L->contains(Backedge))
548  return nullptr;
549  std::swap(Incoming, Backedge);
550  } else if (!L->contains(Backedge))
551  return nullptr;
552 
553  // Look for the cmp instruction to determine if we can get a useful trip
554  // count. The trip count can be either a register or an immediate. The
555  // location of the value depends upon the type (reg or imm).
556  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
557  if (!ExitingBlock)
558  return nullptr;
559 
560  unsigned IVReg = 0;
561  int64_t IVBump = 0;
562  MachineInstr *IVOp;
563  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
564  if (!FoundIV)
565  return nullptr;
566 
567  MachineBasicBlock *Preheader = L->getLoopPreheader();
568 
569  MachineOperand *InitialValue = nullptr;
570  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
571  MachineBasicBlock *Latch = L->getLoopLatch();
572  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
573  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
574  if (MBB == Preheader)
575  InitialValue = &IV_Phi->getOperand(i);
576  else if (MBB == Latch)
577  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
578  }
579  if (!InitialValue)
580  return nullptr;
581 
583  MachineBasicBlock *TB = nullptr, *FB = nullptr;
584  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
585  if (NotAnalyzed)
586  return nullptr;
587 
588  MachineBasicBlock *Header = L->getHeader();
589  // TB must be non-null. If FB is also non-null, one of them must be
590  // the header. Otherwise, branch to TB could be exiting the loop, and
591  // the fall through can go to the header.
592  assert (TB && "Exit block without a branch?");
593  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
594  MachineBasicBlock *LTB = 0, *LFB = 0;
596  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
597  if (NotAnalyzed)
598  return nullptr;
599  if (TB == Latch)
600  TB = (LTB == Header) ? LTB : LFB;
601  else
602  FB = (LTB == Header) ? LTB: LFB;
603  }
604  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
605  if (!TB || (FB && TB != Header && FB != Header))
606  return nullptr;
607 
608  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
609  // to put imm(0), followed by P in the vector Cond.
610  // If TB is not the header, it means that the "not-taken" path must lead
611  // to the header.
612  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
613  unsigned PredReg, PredPos, PredRegFlags;
614  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
615  return nullptr;
616  MachineInstr *CondI = MRI->getVRegDef(PredReg);
617  unsigned CondOpc = CondI->getOpcode();
618 
619  unsigned CmpReg1 = 0, CmpReg2 = 0;
620  int Mask = 0, ImmValue = 0;
621  bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
622  Mask, ImmValue);
623  if (!AnalyzedCmp)
624  return nullptr;
625 
626  // The comparison operator type determines how we compute the loop
627  // trip count.
628  OldInsts.push_back(CondI);
629  OldInsts.push_back(IVOp);
630 
631  // Sadly, the following code gets information based on the position
632  // of the operands in the compare instruction. This has to be done
633  // this way, because the comparisons check for a specific relationship
634  // between the operands (e.g. is-less-than), rather than to find out
635  // what relationship the operands are in (as on PPC).
636  Comparison::Kind Cmp;
637  bool isSwapped = false;
638  const MachineOperand &Op1 = CondI->getOperand(1);
639  const MachineOperand &Op2 = CondI->getOperand(2);
640  const MachineOperand *EndValue = nullptr;
641 
642  if (Op1.isReg()) {
643  if (Op2.isImm() || Op1.getReg() == IVReg)
644  EndValue = &Op2;
645  else {
646  EndValue = &Op1;
647  isSwapped = true;
648  }
649  }
650 
651  if (!EndValue)
652  return nullptr;
653 
654  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
655  if (!Cmp)
656  return nullptr;
657  if (Negated)
658  Cmp = Comparison::getNegatedComparison(Cmp);
659  if (isSwapped)
660  Cmp = Comparison::getSwappedComparison(Cmp);
661 
662  if (InitialValue->isReg()) {
663  unsigned R = InitialValue->getReg();
664  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
665  if (!MDT->properlyDominates(DefBB, Header))
666  return nullptr;
667  OldInsts.push_back(MRI->getVRegDef(R));
668  }
669  if (EndValue->isReg()) {
670  unsigned R = EndValue->getReg();
671  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
672  if (!MDT->properlyDominates(DefBB, Header))
673  return nullptr;
674  OldInsts.push_back(MRI->getVRegDef(R));
675  }
676 
677  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
678 }
679 
680 /// \brief Helper function that returns the expression that represents the
681 /// number of times a loop iterates. The function takes the operands that
682 /// represent the loop start value, loop end value, and induction value.
683 /// Based upon these operands, the function attempts to compute the trip count.
684 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
685  const MachineOperand *Start,
686  const MachineOperand *End,
687  unsigned IVReg,
688  int64_t IVBump,
689  Comparison::Kind Cmp) const {
690  // Cannot handle comparison EQ, i.e. while (A == B).
691  if (Cmp == Comparison::EQ)
692  return nullptr;
693 
694  // Check if either the start or end values are an assignment of an immediate.
695  // If so, use the immediate value rather than the register.
696  if (Start->isReg()) {
697  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
698  if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
699  StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
700  Start = &StartValInstr->getOperand(1);
701  }
702  if (End->isReg()) {
703  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
704  if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
705  EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
706  End = &EndValInstr->getOperand(1);
707  }
708 
709  if (!Start->isReg() && !Start->isImm())
710  return nullptr;
711  if (!End->isReg() && !End->isImm())
712  return nullptr;
713 
714  bool CmpLess = Cmp & Comparison::L;
715  bool CmpGreater = Cmp & Comparison::G;
716  bool CmpHasEqual = Cmp & Comparison::EQ;
717 
718  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
719  if (CmpLess && IVBump < 0)
720  // Loop going while iv is "less" with the iv value going down. Must wrap.
721  return nullptr;
722 
723  if (CmpGreater && IVBump > 0)
724  // Loop going while iv is "greater" with the iv value going up. Must wrap.
725  return nullptr;
726 
727  // Phis that may feed into the loop.
728  LoopFeederMap LoopFeederPhi;
729 
730  // Check if the inital value may be zero and can be decremented in the first
731  // iteration. If the value is zero, the endloop instruction will not decrement
732  // the loop counter, so we shoudn't generate a hardware loop in this case.
733  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
734  LoopFeederPhi))
735  return nullptr;
736 
737  if (Start->isImm() && End->isImm()) {
738  // Both, start and end are immediates.
739  int64_t StartV = Start->getImm();
740  int64_t EndV = End->getImm();
741  int64_t Dist = EndV - StartV;
742  if (Dist == 0)
743  return nullptr;
744 
745  bool Exact = (Dist % IVBump) == 0;
746 
747  if (Cmp == Comparison::NE) {
748  if (!Exact)
749  return nullptr;
750  if ((Dist < 0) ^ (IVBump < 0))
751  return nullptr;
752  }
753 
754  // For comparisons that include the final value (i.e. include equality
755  // with the final value), we need to increase the distance by 1.
756  if (CmpHasEqual)
757  Dist = Dist > 0 ? Dist+1 : Dist-1;
758 
759  // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
760  // CmpGreater should imply Dist < 0. These conditions could actually
761  // fail, for example, in unreachable code (which may still appear to be
762  // reachable in the CFG).
763  if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
764  return nullptr;
765 
766  // "Normalized" distance, i.e. with the bump set to +-1.
767  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
768  : (-Dist + (-IVBump - 1)) / (-IVBump);
769  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
770 
771  uint64_t Count = Dist1;
772 
773  if (Count > 0xFFFFFFFFULL)
774  return nullptr;
775 
776  return new CountValue(CountValue::CV_Immediate, Count);
777  }
778 
779  // A general case: Start and End are some values, but the actual
780  // iteration count may not be available. If it is not, insert
781  // a computation of it into the preheader.
782 
783  // If the induction variable bump is not a power of 2, quit.
784  // Othwerise we'd need a general integer division.
785  if (!isPowerOf2_64(std::abs(IVBump)))
786  return nullptr;
787 
788  MachineBasicBlock *PH = Loop->getLoopPreheader();
789  assert (PH && "Should have a preheader by now");
791  DebugLoc DL;
792  if (InsertPos != PH->end())
793  DL = InsertPos->getDebugLoc();
794 
795  // If Start is an immediate and End is a register, the trip count
796  // will be "reg - imm". Hexagon's "subtract immediate" instruction
797  // is actually "reg + -imm".
798 
799  // If the loop IV is going downwards, i.e. if the bump is negative,
800  // then the iteration count (computed as End-Start) will need to be
801  // negated. To avoid the negation, just swap Start and End.
802  if (IVBump < 0) {
803  std::swap(Start, End);
804  IVBump = -IVBump;
805  }
806  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
807  // Signedness, and "including equality" are preserved.
808 
809  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
810  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
811 
812  int64_t StartV = 0, EndV = 0;
813  if (Start->isImm())
814  StartV = Start->getImm();
815  if (End->isImm())
816  EndV = End->getImm();
817 
818  int64_t AdjV = 0;
819  // To compute the iteration count, we would need this computation:
820  // Count = (End - Start + (IVBump-1)) / IVBump
821  // or, when CmpHasEqual:
822  // Count = (End - Start + (IVBump-1)+1) / IVBump
823  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
824  // generating an instruction specifically to add it if we can adjust
825  // the immediate values for Start or End.
826 
827  if (CmpHasEqual) {
828  // Need to add 1 to the total iteration count.
829  if (Start->isImm())
830  StartV--;
831  else if (End->isImm())
832  EndV++;
833  else
834  AdjV += 1;
835  }
836 
837  if (Cmp != Comparison::NE) {
838  if (Start->isImm())
839  StartV -= (IVBump-1);
840  else if (End->isImm())
841  EndV += (IVBump-1);
842  else
843  AdjV += (IVBump-1);
844  }
845 
846  unsigned R = 0, SR = 0;
847  if (Start->isReg()) {
848  R = Start->getReg();
849  SR = Start->getSubReg();
850  } else {
851  R = End->getReg();
852  SR = End->getSubReg();
853  }
854  const TargetRegisterClass *RC = MRI->getRegClass(R);
855  // Hardware loops cannot handle 64-bit registers. If it's a double
856  // register, it has to have a subregister.
857  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
858  return nullptr;
859  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
860 
861  // Compute DistR (register with the distance between Start and End).
862  unsigned DistR, DistSR;
863 
864  // Avoid special case, where the start value is an imm(0).
865  if (Start->isImm() && StartV == 0) {
866  DistR = End->getReg();
867  DistSR = End->getSubReg();
868  } else {
869  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
870  (RegToImm ? TII->get(Hexagon::A2_subri) :
871  TII->get(Hexagon::A2_addi));
872  if (RegToReg || RegToImm) {
873  unsigned SubR = MRI->createVirtualRegister(IntRC);
874  MachineInstrBuilder SubIB =
875  BuildMI(*PH, InsertPos, DL, SubD, SubR);
876 
877  if (RegToReg)
878  SubIB.addReg(End->getReg(), 0, End->getSubReg())
879  .addReg(Start->getReg(), 0, Start->getSubReg());
880  else
881  SubIB.addImm(EndV)
882  .addReg(Start->getReg(), 0, Start->getSubReg());
883  DistR = SubR;
884  } else {
885  // If the loop has been unrolled, we should use the original loop count
886  // instead of recalculating the value. This will avoid additional
887  // 'Add' instruction.
888  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
889  if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
890  EndValInstr->getOperand(2).getImm() == StartV) {
891  DistR = EndValInstr->getOperand(1).getReg();
892  } else {
893  unsigned SubR = MRI->createVirtualRegister(IntRC);
894  MachineInstrBuilder SubIB =
895  BuildMI(*PH, InsertPos, DL, SubD, SubR);
896  SubIB.addReg(End->getReg(), 0, End->getSubReg())
897  .addImm(-StartV);
898  DistR = SubR;
899  }
900  }
901  DistSR = 0;
902  }
903 
904  // From DistR, compute AdjR (register with the adjusted distance).
905  unsigned AdjR, AdjSR;
906 
907  if (AdjV == 0) {
908  AdjR = DistR;
909  AdjSR = DistSR;
910  } else {
911  // Generate CountR = ADD DistR, AdjVal
912  unsigned AddR = MRI->createVirtualRegister(IntRC);
913  MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
914  BuildMI(*PH, InsertPos, DL, AddD, AddR)
915  .addReg(DistR, 0, DistSR)
916  .addImm(AdjV);
917 
918  AdjR = AddR;
919  AdjSR = 0;
920  }
921 
922  // From AdjR, compute CountR (register with the final count).
923  unsigned CountR, CountSR;
924 
925  if (IVBump == 1) {
926  CountR = AdjR;
927  CountSR = AdjSR;
928  } else {
929  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
930  unsigned Shift = Log2_32(IVBump);
931 
932  // Generate NormR = LSR DistR, Shift.
933  unsigned LsrR = MRI->createVirtualRegister(IntRC);
934  const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
935  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
936  .addReg(AdjR, 0, AdjSR)
937  .addImm(Shift);
938 
939  CountR = LsrR;
940  CountSR = 0;
941  }
942 
943  return new CountValue(CountValue::CV_Register, CountR, CountSR);
944 }
945 
946 /// \brief Return true if the operation is invalid within hardware loop.
947 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
948  bool IsInnerHWLoop) const {
949 
950  // Call is not allowed because the callee may use a hardware loop except for
951  // the case when the call never returns.
952  if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
953  return true;
954 
955  // Check if the instruction defines a hardware loop register.
956  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
957  const MachineOperand &MO = MI->getOperand(i);
958  if (!MO.isReg() || !MO.isDef())
959  continue;
960  unsigned R = MO.getReg();
961  if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
962  R == Hexagon::LC1 || R == Hexagon::SA1))
963  return true;
964  if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
965  return true;
966  }
967  return false;
968 }
969 
970 /// \brief Return true if the loop contains an instruction that inhibits
971 /// the use of the hardware loop instruction.
972 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
973  bool IsInnerHWLoop) const {
974  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
975  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
976  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
977  MachineBasicBlock *MBB = Blocks[i];
979  MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
980  const MachineInstr *MI = &*MII;
981  if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
982  DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
983  return true;
984  }
985  }
986  }
987  return false;
988 }
989 
990 /// \brief Returns true if the instruction is dead. This was essentially
991 /// copied from DeadMachineInstructionElim::isDead, but with special cases
992 /// for inline asm, physical registers and instructions with side effects
993 /// removed.
994 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
995  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
996  // Examine each operand.
997  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
998  const MachineOperand &MO = MI->getOperand(i);
999  if (!MO.isReg() || !MO.isDef())
1000  continue;
1001 
1002  unsigned Reg = MO.getReg();
1003  if (MRI->use_nodbg_empty(Reg))
1004  continue;
1005 
1006  typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
1007 
1008  // This instruction has users, but if the only user is the phi node for the
1009  // parent block, and the only use of that phi node is this instruction, then
1010  // this instruction is dead: both it (and the phi node) can be removed.
1011  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1012  use_nodbg_iterator End = MRI->use_nodbg_end();
1013  if (std::next(I) != End || !I->getParent()->isPHI())
1014  return false;
1015 
1016  MachineInstr *OnePhi = I->getParent();
1017  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1018  const MachineOperand &OPO = OnePhi->getOperand(j);
1019  if (!OPO.isReg() || !OPO.isDef())
1020  continue;
1021 
1022  unsigned OPReg = OPO.getReg();
1023  use_nodbg_iterator nextJ;
1024  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1025  J != End; J = nextJ) {
1026  nextJ = std::next(J);
1027  MachineOperand &Use = *J;
1028  MachineInstr *UseMI = Use.getParent();
1029 
1030  // If the phi node has a user that is not MI, bail.
1031  if (MI != UseMI)
1032  return false;
1033  }
1034  }
1035  DeadPhis.push_back(OnePhi);
1036  }
1037 
1038  // If there are no defs with uses, the instruction is dead.
1039  return true;
1040 }
1041 
1042 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1043  // This procedure was essentially copied from DeadMachineInstructionElim.
1044 
1046  if (isDead(MI, DeadPhis)) {
1047  DEBUG(dbgs() << "HW looping will remove: " << *MI);
1048 
1049  // It is possible that some DBG_VALUE instructions refer to this
1050  // instruction. Examine each def operand for such references;
1051  // if found, mark the DBG_VALUE as undef (but don't delete it).
1052  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1053  const MachineOperand &MO = MI->getOperand(i);
1054  if (!MO.isReg() || !MO.isDef())
1055  continue;
1056  unsigned Reg = MO.getReg();
1058  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1059  E = MRI->use_end(); I != E; I = nextI) {
1060  nextI = std::next(I); // I is invalidated by the setReg
1061  MachineOperand &Use = *I;
1062  MachineInstr *UseMI = I->getParent();
1063  if (UseMI == MI)
1064  continue;
1065  if (Use.isDebug())
1066  UseMI->getOperand(0).setReg(0U);
1067  }
1068  }
1069 
1070  MI->eraseFromParent();
1071  for (unsigned i = 0; i < DeadPhis.size(); ++i)
1072  DeadPhis[i]->eraseFromParent();
1073  }
1074 }
1075 
1076 /// \brief Check if the loop is a candidate for converting to a hardware
1077 /// loop. If so, then perform the transformation.
1078 ///
1079 /// This function works on innermost loops first. A loop can be converted
1080 /// if it is a counting loop; either a register value or an immediate.
1081 ///
1082 /// The code makes several assumptions about the representation of the loop
1083 /// in llvm.
1084 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1085  bool &RecL0used,
1086  bool &RecL1used) {
1087  // This is just for sanity.
1088  assert(L->getHeader() && "Loop without a header?");
1089 
1090  bool Changed = false;
1091  bool L0Used = false;
1092  bool L1Used = false;
1093 
1094  // Process nested loops first.
1095  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1096  Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1097  L0Used |= RecL0used;
1098  L1Used |= RecL1used;
1099  }
1100 
1101  // If a nested loop has been converted, then we can't convert this loop.
1102  if (Changed && L0Used && L1Used)
1103  return Changed;
1104 
1105  unsigned LOOP_i;
1106  unsigned LOOP_r;
1107  unsigned ENDLOOP;
1108 
1109  // Flag used to track loopN instruction:
1110  // 1 - Hardware loop is being generated for the inner most loop.
1111  // 0 - Hardware loop is being generated for the outer loop.
1112  unsigned IsInnerHWLoop = 1;
1113 
1114  if (L0Used) {
1115  LOOP_i = Hexagon::J2_loop1i;
1116  LOOP_r = Hexagon::J2_loop1r;
1117  ENDLOOP = Hexagon::ENDLOOP1;
1118  IsInnerHWLoop = 0;
1119  } else {
1120  LOOP_i = Hexagon::J2_loop0i;
1121  LOOP_r = Hexagon::J2_loop0r;
1122  ENDLOOP = Hexagon::ENDLOOP0;
1123  }
1124 
1125 #ifndef NDEBUG
1126  // Stop trying after reaching the limit (if any).
1127  int Limit = HWLoopLimit;
1128  if (Limit >= 0) {
1129  if (Counter >= HWLoopLimit)
1130  return false;
1131  Counter++;
1132  }
1133 #endif
1134 
1135  // Does the loop contain any invalid instructions?
1136  if (containsInvalidInstruction(L, IsInnerHWLoop))
1137  return false;
1138 
1139  MachineBasicBlock *LastMBB = getExitingBlock(L);
1140  // Don't generate hw loop if the loop has more than one exit.
1141  if (!LastMBB)
1142  return false;
1143 
1145  if (LastI == LastMBB->end())
1146  return false;
1147 
1148  // Is the induction variable bump feeding the latch condition?
1149  if (!fixupInductionVariable(L))
1150  return false;
1151 
1152  // Ensure the loop has a preheader: the loop instruction will be
1153  // placed there.
1154  MachineBasicBlock *Preheader = L->getLoopPreheader();
1155  if (!Preheader) {
1156  Preheader = createPreheaderForLoop(L);
1157  if (!Preheader)
1158  return false;
1159  }
1160 
1161  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1162 
1164  // Are we able to determine the trip count for the loop?
1165  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1166  if (!TripCount)
1167  return false;
1168 
1169  // Is the trip count available in the preheader?
1170  if (TripCount->isReg()) {
1171  // There will be a use of the register inserted into the preheader,
1172  // so make sure that the register is actually defined at that point.
1173  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1174  MachineBasicBlock *BBDef = TCDef->getParent();
1175  if (!MDT->dominates(BBDef, Preheader))
1176  return false;
1177  }
1178 
1179  // Determine the loop start.
1180  MachineBasicBlock *TopBlock = L->getTopBlock();
1181  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
1182  MachineBasicBlock *LoopStart = 0;
1183  if (ExitingBlock != L->getLoopLatch()) {
1184  MachineBasicBlock *TB = 0, *FB = 0;
1186 
1187  if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1188  return false;
1189 
1190  if (L->contains(TB))
1191  LoopStart = TB;
1192  else if (L->contains(FB))
1193  LoopStart = FB;
1194  else
1195  return false;
1196  }
1197  else
1198  LoopStart = TopBlock;
1199 
1200  // Convert the loop to a hardware loop.
1201  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1202  DebugLoc DL;
1203  if (InsertPos != Preheader->end())
1204  DL = InsertPos->getDebugLoc();
1205 
1206  if (TripCount->isReg()) {
1207  // Create a copy of the loop count register.
1208  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1209  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1210  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1211  // Add the Loop instruction to the beginning of the loop.
1212  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1213  .addReg(CountReg);
1214  } else {
1215  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1216  // Add the Loop immediate instruction to the beginning of the loop,
1217  // if the immediate fits in the instructions. Otherwise, we need to
1218  // create a new virtual register.
1219  int64_t CountImm = TripCount->getImm();
1220  if (!TII->isValidOffset(LOOP_i, CountImm)) {
1221  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1222  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1223  .addImm(CountImm);
1224  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1225  .addMBB(LoopStart).addReg(CountReg);
1226  } else
1227  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1228  .addMBB(LoopStart).addImm(CountImm);
1229  }
1230 
1231  // Make sure the loop start always has a reference in the CFG. We need
1232  // to create a BlockAddress operand to get this mechanism to work both the
1233  // MachineBasicBlock and BasicBlock objects need the flag set.
1234  LoopStart->setHasAddressTaken();
1235  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1236  // object.
1237  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1238 
1239  // Replace the loop branch with an endloop instruction.
1240  DebugLoc LastIDL = LastI->getDebugLoc();
1241  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1242 
1243  // The loop ends with either:
1244  // - a conditional branch followed by an unconditional branch, or
1245  // - a conditional branch to the loop start.
1246  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1247  LastI->getOpcode() == Hexagon::J2_jumpf) {
1248  // Delete one and change/add an uncond. branch to out of the loop.
1249  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1250  LastI = LastMBB->erase(LastI);
1251  if (!L->contains(BranchTarget)) {
1252  if (LastI != LastMBB->end())
1253  LastI = LastMBB->erase(LastI);
1255  TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1256  }
1257  } else {
1258  // Conditional branch to loop start; just delete it.
1259  LastMBB->erase(LastI);
1260  }
1261  delete TripCount;
1262 
1263  // The induction operation and the comparison may now be
1264  // unneeded. If these are unneeded, then remove them.
1265  for (unsigned i = 0; i < OldInsts.size(); ++i)
1266  removeIfDead(OldInsts[i]);
1267 
1268  ++NumHWLoops;
1269 
1270  // Set RecL1used and RecL0used only after hardware loop has been
1271  // successfully generated. Doing it earlier can cause wrong loop instruction
1272  // to be used.
1273  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1274  RecL1used = true;
1275  else
1276  RecL0used = true;
1277 
1278  return true;
1279 }
1280 
1281 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1282  MachineInstr *CmpI) {
1283  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1284 
1285  MachineBasicBlock *BB = BumpI->getParent();
1286  if (CmpI->getParent() != BB)
1287  return false;
1288 
1289  typedef MachineBasicBlock::instr_iterator instr_iterator;
1290  // Check if things are in order to begin with.
1291  for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
1292  if (&*I == CmpI)
1293  return true;
1294 
1295  // Out of order.
1296  unsigned PredR = CmpI->getOperand(0).getReg();
1297  bool FoundBump = false;
1298  instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt);
1299  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1300  MachineInstr *In = &*I;
1301  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1302  MachineOperand &MO = In->getOperand(i);
1303  if (MO.isReg() && MO.isUse()) {
1304  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1305  return false;
1306  }
1307  }
1308 
1309  if (In == BumpI) {
1310  instr_iterator After = BumpI;
1311  instr_iterator From = CmpI;
1312  BB->splice(std::next(After), BB, From);
1313  FoundBump = true;
1314  break;
1315  }
1316  }
1317  assert (FoundBump && "Cannot determine instruction order");
1318  return FoundBump;
1319 }
1320 
1321 /// This function is required to break recursion. Visiting phis in a loop may
1322 /// result in recursion during compilation. We break the recursion by making
1323 /// sure that we visit a MachineOperand and its definition in a
1324 /// MachineInstruction only once. If we attempt to visit more than once, then
1325 /// there is recursion, and will return false.
1326 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1327  MachineInstr *MI,
1328  const MachineOperand *MO,
1329  LoopFeederMap &LoopFeederPhi) const {
1330  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1331  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1332  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1333  // Ignore all BBs that form Loop.
1334  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1335  MachineBasicBlock *MBB = Blocks[i];
1336  if (A == MBB)
1337  return false;
1338  }
1339  MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1340  LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1341  return true;
1342  } else
1343  // Already visited node.
1344  return false;
1345 }
1346 
1347 /// Return true if a Phi may generate a value that can underflow.
1348 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1349 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1350  MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1351  MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1352  assert(Phi->isPHI() && "Expecting a Phi.");
1353  // Walk through each Phi, and its used operands. Make sure that
1354  // if there is recursion in Phi, we won't generate hardware loops.
1355  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1356  if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1357  if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1358  Phi->getParent(), L, LoopFeederPhi))
1359  return true;
1360  return false;
1361 }
1362 
1363 /// Return true if the induction variable can underflow in the first iteration.
1364 /// An example, is an initial unsigned value that is 0 and is decrement in the
1365 /// first itertion of a do-while loop. In this case, we cannot generate a
1366 /// hardware loop because the endloop instruction does not decrement the loop
1367 /// counter if it is <= 1. We only need to perform this analysis if the
1368 /// initial value is a register.
1369 ///
1370 /// This function assumes the initial value may underfow unless proven
1371 /// otherwise. If the type is signed, then we don't care because signed
1372 /// underflow is undefined. We attempt to prove the initial value is not
1373 /// zero by perfoming a crude analysis of the loop counter. This function
1374 /// checks if the initial value is used in any comparison prior to the loop
1375 /// and, if so, assumes the comparison is a range check. This is inexact,
1376 /// but will catch the simple cases.
1377 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1378  const MachineOperand *InitVal, const MachineOperand *EndVal,
1379  MachineBasicBlock *MBB, MachineLoop *L,
1380  LoopFeederMap &LoopFeederPhi) const {
1381  // Only check register values since they are unknown.
1382  if (!InitVal->isReg())
1383  return false;
1384 
1385  if (!EndVal->isImm())
1386  return false;
1387 
1388  // A register value that is assigned an immediate is a known value, and it
1389  // won't underflow in the first iteration.
1390  int64_t Imm;
1391  if (checkForImmediate(*InitVal, Imm))
1392  return (EndVal->getImm() == Imm);
1393 
1394  unsigned Reg = InitVal->getReg();
1395 
1396  // We don't know the value of a physical register.
1398  return true;
1399 
1400  MachineInstr *Def = MRI->getVRegDef(Reg);
1401  if (!Def)
1402  return true;
1403 
1404  // If the initial value is a Phi or copy and the operands may not underflow,
1405  // then the definition cannot be underflow either.
1406  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1407  L, LoopFeederPhi))
1408  return false;
1409  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1410  EndVal, Def->getParent(),
1411  L, LoopFeederPhi))
1412  return false;
1413 
1414  // Iterate over the uses of the initial value. If the initial value is used
1415  // in a compare, then we assume this is a range check that ensures the loop
1416  // doesn't underflow. This is not an exact test and should be improved.
1417  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1418  E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1419  MachineInstr *MI = &*I;
1420  unsigned CmpReg1 = 0, CmpReg2 = 0;
1421  int CmpMask = 0, CmpValue = 0;
1422 
1423  if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1424  continue;
1425 
1426  MachineBasicBlock *TBB = 0, *FBB = 0;
1428  if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1429  continue;
1430 
1431  Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
1432  if (Cmp == 0)
1433  continue;
1434  if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1435  Cmp = Comparison::getNegatedComparison(Cmp);
1436  if (CmpReg2 != 0 && CmpReg2 == Reg)
1437  Cmp = Comparison::getSwappedComparison(Cmp);
1438 
1439  // Signed underflow is undefined.
1440  if (Comparison::isSigned(Cmp))
1441  return false;
1442 
1443  // Check if there is a comparison of the inital value. If the initial value
1444  // is greater than or not equal to another value, then assume this is a
1445  // range check.
1446  if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1447  return false;
1448  }
1449 
1450  // OK - this is a hack that needs to be improved. We really need to analyze
1451  // the instructions performed on the initial value. This works on the simplest
1452  // cases only.
1453  if (!Def->isCopy() && !Def->isPHI())
1454  return false;
1455 
1456  return true;
1457 }
1458 
1459 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1460  int64_t &Val) const {
1461  if (MO.isImm()) {
1462  Val = MO.getImm();
1463  return true;
1464  }
1465  if (!MO.isReg())
1466  return false;
1467 
1468  // MO is a register. Check whether it is defined as an immediate value,
1469  // and if so, get the value of it in TV. That value will then need to be
1470  // processed to handle potential subregisters in MO.
1471  int64_t TV;
1472 
1473  unsigned R = MO.getReg();
1475  return false;
1476  MachineInstr *DI = MRI->getVRegDef(R);
1477  unsigned DOpc = DI->getOpcode();
1478  switch (DOpc) {
1479  case TargetOpcode::COPY:
1480  case Hexagon::A2_tfrsi:
1481  case Hexagon::A2_tfrpi:
1482  case Hexagon::CONST32_Int_Real:
1483  case Hexagon::CONST64_Int_Real: {
1484  // Call recursively to avoid an extra check whether operand(1) is
1485  // indeed an immediate (it could be a global address, for example),
1486  // plus we can handle COPY at the same time.
1487  if (!checkForImmediate(DI->getOperand(1), TV))
1488  return false;
1489  break;
1490  }
1491  case Hexagon::A2_combineii:
1492  case Hexagon::A4_combineir:
1493  case Hexagon::A4_combineii:
1494  case Hexagon::A4_combineri:
1495  case Hexagon::A2_combinew: {
1496  const MachineOperand &S1 = DI->getOperand(1);
1497  const MachineOperand &S2 = DI->getOperand(2);
1498  int64_t V1, V2;
1499  if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1500  return false;
1501  TV = V2 | (V1 << 32);
1502  break;
1503  }
1505  const MachineOperand &S1 = DI->getOperand(1);
1506  const MachineOperand &S3 = DI->getOperand(3);
1507  int64_t V1, V3;
1508  if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1509  return false;
1510  unsigned Sub2 = DI->getOperand(2).getImm();
1511  unsigned Sub4 = DI->getOperand(4).getImm();
1512  if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
1513  TV = V1 | (V3 << 32);
1514  else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
1515  TV = V3 | (V1 << 32);
1516  else
1517  llvm_unreachable("Unexpected form of REG_SEQUENCE");
1518  break;
1519  }
1520 
1521  default:
1522  return false;
1523  }
1524 
1525  // By now, we should have successfuly obtained the immediate value defining
1526  // the register referenced in MO. Handle a potential use of a subregister.
1527  switch (MO.getSubReg()) {
1528  case Hexagon::subreg_loreg:
1529  Val = TV & 0xFFFFFFFFULL;
1530  break;
1531  case Hexagon::subreg_hireg:
1532  Val = (TV >> 32) & 0xFFFFFFFFULL;
1533  break;
1534  default:
1535  Val = TV;
1536  break;
1537  }
1538  return true;
1539 }
1540 
1541 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1542  if (MO.isImm()) {
1543  MO.setImm(Val);
1544  return;
1545  }
1546 
1547  assert(MO.isReg());
1548  unsigned R = MO.getReg();
1549  MachineInstr *DI = MRI->getVRegDef(R);
1550 
1551  const TargetRegisterClass *RC = MRI->getRegClass(R);
1552  unsigned NewR = MRI->createVirtualRegister(RC);
1553  MachineBasicBlock &B = *DI->getParent();
1554  DebugLoc DL = DI->getDebugLoc();
1555  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1556  MO.setReg(NewR);
1557 }
1558 
1559 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1560  // These two instructions are not extendable.
1561  if (CmpOpc == Hexagon::A4_cmpbeqi)
1562  return isUInt<8>(Imm);
1563  if (CmpOpc == Hexagon::A4_cmpbgti)
1564  return isInt<8>(Imm);
1565  // The rest of the comparison-with-immediate instructions are extendable.
1566  return true;
1567 }
1568 
1569 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1570  MachineBasicBlock *Header = L->getHeader();
1571  MachineBasicBlock *Latch = L->getLoopLatch();
1572  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
1573 
1574  if (!(Header && Latch && ExitingBlock))
1575  return false;
1576 
1577  // These data structures follow the same concept as the corresponding
1578  // ones in findInductionRegister (where some comments are).
1579  typedef std::pair<unsigned,int64_t> RegisterBump;
1580  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1581  typedef std::set<RegisterInduction> RegisterInductionSet;
1582 
1583  // Register candidates for induction variables, with their associated bumps.
1584  RegisterInductionSet IndRegs;
1585 
1586  // Look for induction patterns:
1587  // vreg1 = PHI ..., [ latch, vreg2 ]
1588  // vreg2 = ADD vreg1, imm
1589  typedef MachineBasicBlock::instr_iterator instr_iterator;
1590  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1591  I != E && I->isPHI(); ++I) {
1592  MachineInstr *Phi = &*I;
1593 
1594  // Have a PHI instruction.
1595  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1596  if (Phi->getOperand(i+1).getMBB() != Latch)
1597  continue;
1598 
1599  unsigned PhiReg = Phi->getOperand(i).getReg();
1600  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1601  unsigned UpdOpc = DI->getOpcode();
1602  bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
1603 
1604  if (isAdd) {
1605  // If the register operand to the add/sub is the PHI we are looking
1606  // at, this meets the induction pattern.
1607  unsigned IndReg = DI->getOperand(1).getReg();
1608  MachineOperand &Opnd2 = DI->getOperand(2);
1609  int64_t V;
1610  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1611  unsigned UpdReg = DI->getOperand(0).getReg();
1612  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1613  }
1614  }
1615  } // for (i)
1616  } // for (instr)
1617 
1618  if (IndRegs.empty())
1619  return false;
1620 
1621  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1623  // AnalyzeBranch returns true if it fails to analyze branch.
1624  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1625  if (NotAnalyzed || Cond.empty())
1626  return false;
1627 
1628  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1629  MachineBasicBlock *LTB = 0, *LFB = 0;
1631  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
1632  if (NotAnalyzed)
1633  return false;
1634 
1635  // Since latch is not the exiting block, the latch branch should be an
1636  // unconditional branch to the loop header.
1637  if (TB == Latch)
1638  TB = (LTB == Header) ? LTB : LFB;
1639  else
1640  FB = (LTB == Header) ? LTB : LFB;
1641  }
1642  if (TB != Header) {
1643  if (FB != Header) {
1644  // The latch/exit block does not go back to the header.
1645  return false;
1646  }
1647  // FB is the header (i.e., uncond. jump to branch header)
1648  // In this case, the LoopBody -> TB should not be a back edge otherwise
1649  // it could result in an infinite loop after conversion to hw_loop.
1650  // This case can happen when the Latch has two jumps like this:
1651  // Jmp_c OuterLoopHeader <-- TB
1652  // Jmp InnerLoopHeader <-- FB
1653  if (MDT->dominates(TB, FB))
1654  return false;
1655  }
1656 
1657  // Expecting a predicate register as a condition. It won't be a hardware
1658  // predicate register at this point yet, just a vreg.
1659  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1660  // into Cond, followed by the predicate register. For non-negated branches
1661  // it's just the register.
1662  unsigned CSz = Cond.size();
1663  if (CSz != 1 && CSz != 2)
1664  return false;
1665 
1666  if (!Cond[CSz-1].isReg())
1667  return false;
1668 
1669  unsigned P = Cond[CSz-1].getReg();
1670  MachineInstr *PredDef = MRI->getVRegDef(P);
1671 
1672  if (!PredDef->isCompare())
1673  return false;
1674 
1675  SmallSet<unsigned,2> CmpRegs;
1676  MachineOperand *CmpImmOp = nullptr;
1677 
1678  // Go over all operands to the compare and look for immediate and register
1679  // operands. Assume that if the compare has a single register use and a
1680  // single immediate operand, then the register is being compared with the
1681  // immediate value.
1682  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1683  MachineOperand &MO = PredDef->getOperand(i);
1684  if (MO.isReg()) {
1685  // Skip all implicit references. In one case there was:
1686  // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1687  if (MO.isImplicit())
1688  continue;
1689  if (MO.isUse()) {
1690  if (!isImmediate(MO)) {
1691  CmpRegs.insert(MO.getReg());
1692  continue;
1693  }
1694  // Consider the register to be the "immediate" operand.
1695  if (CmpImmOp)
1696  return false;
1697  CmpImmOp = &MO;
1698  }
1699  } else if (MO.isImm()) {
1700  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1701  return false;
1702  CmpImmOp = &MO;
1703  }
1704  }
1705 
1706  if (CmpRegs.empty())
1707  return false;
1708 
1709  // Check if the compared register follows the order we want. Fix if needed.
1710  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1711  I != E; ++I) {
1712  // This is a success. If the register used in the comparison is one that
1713  // we have identified as a bumped (updated) induction register, there is
1714  // nothing to do.
1715  if (CmpRegs.count(I->first))
1716  return true;
1717 
1718  // Otherwise, if the register being compared comes out of a PHI node,
1719  // and has been recognized as following the induction pattern, and is
1720  // compared against an immediate, we can fix it.
1721  const RegisterBump &RB = I->second;
1722  if (CmpRegs.count(RB.first)) {
1723  if (!CmpImmOp) {
1724  // If both operands to the compare instruction are registers, see if
1725  // it can be changed to use induction register as one of the operands.
1726  MachineInstr *IndI = nullptr;
1727  MachineInstr *nonIndI = nullptr;
1728  MachineOperand *IndMO = nullptr;
1729  MachineOperand *nonIndMO = nullptr;
1730 
1731  for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1732  MachineOperand &MO = PredDef->getOperand(i);
1733  if (MO.isReg() && MO.getReg() == RB.first) {
1734  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1735  << *(MRI->getVRegDef(I->first)));
1736  if (IndI)
1737  return false;
1738 
1739  IndI = MRI->getVRegDef(I->first);
1740  IndMO = &MO;
1741  } else if (MO.isReg()) {
1742  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1743  << *(MRI->getVRegDef(MO.getReg())));
1744  if (nonIndI)
1745  return false;
1746 
1747  nonIndI = MRI->getVRegDef(MO.getReg());
1748  nonIndMO = &MO;
1749  }
1750  }
1751  if (IndI && nonIndI &&
1752  nonIndI->getOpcode() == Hexagon::A2_addi &&
1753  nonIndI->getOperand(2).isImm() &&
1754  nonIndI->getOperand(2).getImm() == - RB.second) {
1755  bool Order = orderBumpCompare(IndI, PredDef);
1756  if (Order) {
1757  IndMO->setReg(I->first);
1758  nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1759  return true;
1760  }
1761  }
1762  return false;
1763  }
1764 
1765  // It is not valid to do this transformation on an unsigned comparison
1766  // because it may underflow.
1767  Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
1768  if (!Cmp || Comparison::isUnsigned(Cmp))
1769  return false;
1770 
1771  // If the register is being compared against an immediate, try changing
1772  // the compare instruction to use induction register and adjust the
1773  // immediate operand.
1774  int64_t CmpImm = getImmediate(*CmpImmOp);
1775  int64_t V = RB.second;
1776  // Handle Overflow (64-bit).
1777  if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1778  ((V < 0) && (CmpImm < INT64_MIN - V)))
1779  return false;
1780  CmpImm += V;
1781  // Most comparisons of register against an immediate value allow
1782  // the immediate to be constant-extended. There are some exceptions
1783  // though. Make sure the new combination will work.
1784  if (CmpImmOp->isImm())
1785  if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1786  return false;
1787 
1788  // Make sure that the compare happens after the bump. Otherwise,
1789  // after the fixup, the compare would use a yet-undefined register.
1790  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1791  bool Order = orderBumpCompare(BumpI, PredDef);
1792  if (!Order)
1793  return false;
1794 
1795  // Finally, fix the compare instruction.
1796  setImmediate(*CmpImmOp, CmpImm);
1797  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1798  MachineOperand &MO = PredDef->getOperand(i);
1799  if (MO.isReg() && MO.getReg() == RB.first) {
1800  MO.setReg(I->first);
1801  return true;
1802  }
1803  }
1804  }
1805  }
1806 
1807  return false;
1808 }
1809 
1810 /// \brief Create a preheader for a given loop.
1811 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1812  MachineLoop *L) {
1813  if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
1814  return TmpPH;
1815 
1816  if (!HWCreatePreheader)
1817  return nullptr;
1818 
1819  MachineBasicBlock *Header = L->getHeader();
1820  MachineBasicBlock *Latch = L->getLoopLatch();
1821  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
1822  MachineFunction *MF = Header->getParent();
1823  DebugLoc DL;
1824 
1825 #ifndef NDEBUG
1826  if ((PHFn != "") && (PHFn != MF->getName()))
1827  return nullptr;
1828 #endif
1829 
1830  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1831  return nullptr;
1832 
1833  typedef MachineBasicBlock::instr_iterator instr_iterator;
1834 
1835  // Verify that all existing predecessors have analyzable branches
1836  // (or no branches at all).
1837  typedef std::vector<MachineBasicBlock*> MBBVector;
1838  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1840  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1841 
1842  if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1843  return nullptr;
1844 
1845  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1846  MachineBasicBlock *PB = *I;
1847  bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
1848  if (NotAnalyzed)
1849  return nullptr;
1850  }
1851 
1853  MF->insert(Header, NewPH);
1854 
1855  if (Header->pred_size() > 2) {
1856  // Ensure that the header has only two predecessors: the preheader and
1857  // the loop latch. Any additional predecessors of the header should
1858  // join at the newly created preheader. Inspect all PHI nodes from the
1859  // header and create appropriate corresponding PHI nodes in the preheader.
1860 
1861  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1862  I != E && I->isPHI(); ++I) {
1863  MachineInstr *PN = &*I;
1864 
1865  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1866  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1867  NewPH->insert(NewPH->end(), NewPN);
1868 
1869  unsigned PR = PN->getOperand(0).getReg();
1870  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1871  unsigned NewPR = MRI->createVirtualRegister(RC);
1872  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1873 
1874  // Copy all non-latch operands of a header's PHI node to the newly
1875  // created PHI node in the preheader.
1876  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1877  unsigned PredR = PN->getOperand(i).getReg();
1878  unsigned PredRSub = PN->getOperand(i).getSubReg();
1879  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1880  if (PredB == Latch)
1881  continue;
1882 
1883  MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1884  MO.setSubReg(PredRSub);
1885  NewPN->addOperand(MO);
1886  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1887  }
1888 
1889  // Remove copied operands from the old PHI node and add the value
1890  // coming from the preheader's PHI.
1891  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1892  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1893  if (PredB != Latch) {
1894  PN->RemoveOperand(i+1);
1895  PN->RemoveOperand(i);
1896  }
1897  }
1898  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1900  }
1901 
1902  } else {
1903  assert(Header->pred_size() == 2);
1904 
1905  // The header has only two predecessors, but the non-latch predecessor
1906  // is not a preheader (e.g. it has other successors, etc.)
1907  // In such a case we don't need any extra PHI nodes in the new preheader,
1908  // all we need is to adjust existing PHIs in the header to now refer to
1909  // the new preheader.
1910  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1911  I != E && I->isPHI(); ++I) {
1912  MachineInstr *PN = &*I;
1913  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1914  MachineOperand &MO = PN->getOperand(i+1);
1915  if (MO.getMBB() != Latch)
1916  MO.setMBB(NewPH);
1917  }
1918  }
1919  }
1920 
1921  // "Reroute" the CFG edges to link in the new preheader.
1922  // If any of the predecessors falls through to the header, insert a branch
1923  // to the new preheader in that place.
1926 
1927  TB = FB = nullptr;
1928 
1929  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1930  MachineBasicBlock *PB = *I;
1931  if (PB != Latch) {
1932  Tmp2.clear();
1933  bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
1934  (void)NotAnalyzed; // suppress compiler warning
1935  assert (!NotAnalyzed && "Should be analyzable!");
1936  if (TB != Header && (Tmp2.empty() || FB != Header))
1937  TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1938  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1939  }
1940  }
1941 
1942  // It can happen that the latch block will fall through into the header.
1943  // Insert an unconditional branch to the header.
1944  TB = FB = nullptr;
1945  bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
1946  (void)LatchNotAnalyzed; // suppress compiler warning
1947  assert (!LatchNotAnalyzed && "Should be analyzable!");
1948  if (!TB && !FB)
1949  TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1950 
1951  // Finally, the branch from the preheader to the header.
1952  TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1953  NewPH->addSuccessor(Header);
1954 
1955  MachineLoop *ParentLoop = L->getParentLoop();
1956  if (ParentLoop)
1957  ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1958 
1959  // Update the dominator information with the new preheader.
1960  if (MDT) {
1961  MachineDomTreeNode *HDom = MDT->getNode(Header);
1962  MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
1963  MDT->changeImmediateDominator(Header, NewPH);
1964  }
1965 
1966  return NewPH;
1967 }
static bool isReg(const MCInst &MI, unsigned OpNo)
bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:294
bool isImplicit() const
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
instr_iterator instr_end()
STATISTIC(NumFunctions,"Total number of functions")
MachineBasicBlock * getMBB() const
void initializeHexagonHardwareLoopsPass(PassRegistry &)
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm)
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
bool isLoopExiting(const BlockT *BB) const
isLoopExiting - True if terminator in the block can branch to another block that is outside of the cu...
Definition: LoopInfo.h:152
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
LoopT * getParentLoop() const
Definition: LoopInfo.h:97
A debug info location.
Definition: DebugLoc.h:34
F(f)
FunctionPass * createHexagonHardwareLoops()
Instructions::iterator instr_iterator
const std::vector< BlockT * > & getBlocks() const
getBlocks - Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
BlockT * getHeader() const
Definition: LoopInfo.h:96
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallSet.h:44
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops,"hwloops","Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
ReplaceUsesOfBlockWith - Given a machine basic block that branched to 'Old', change the code and CFG ...
Hexagon Hardware Loops
MachineBasicBlock * getTopBlock()
getTopBlock - Return the "top" block in the loop, which is the first block in the linear layout...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isPHI() const
Definition: MachineInstr.h:757
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:214
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
#define G(x, y, z)
Definition: MD5.cpp:52
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PrintReg - Helper class for printing registers on a raw_ostream.
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
addBasicBlockToLoop - This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:187
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
Base class for the actual dominator tree node.
std::vector< MachineBasicBlock * >::iterator pred_iterator
#define EQ(a, b)
Definition: regexec.c:112
int64_t getImm() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void dump() const
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
bundle_iterator< MachineInstr, instr_iterator > iterator
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
iterator begin() const
Definition: LoopInfo.h:131
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:108
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static BlockAddress * get(Function *F, BasicBlock *BB)
get - Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1496
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
void setMBB(MachineBasicBlock *MBB)
iterator end() const
Definition: LoopInfo.h:132
bool isCopy() const
Definition: MachineInstr.h:778
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
BlockT * getExitingBlock() const
getExitingBlock - If getExitingBlocks would return exactly one block, return that block...
Definition: LoopInfoImpl.h:51
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
unsigned getSubReg() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:53
DomTreeNodeBase< NodeT > * getIDom() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Hexagon Hardware false
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:464
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
void dump() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
void setHasAddressTaken()
setHasAddressTaken - Set this block to reflect that it potentially is the target of an indirect branc...
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
NodeT * getBlock() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1030
Representation of each machine instruction.
Definition: MachineInstr.h:51
bool hasAddressTaken() const
hasAddressTaken - Test whether this block is potentially the target of an indirect branch...
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
void setSubReg(unsigned subReg)
unsigned getReg() const
getReg - Returns the register number.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const ARM::ArchExtKind Kind
bool isDebug() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:128
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
static MachineBasicBlock * getExitingBlock(MachineLoop *L)
Return the latch block if it's one of the exiting blocks.