LLVM  4.0.0
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction. The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 // - Countable loops (w/ ind. var for a trip count)
22 // - Assumes loops are normalized by IndVarSimplify
23 // - Try inner-most loops first
24 // - No function calls in loops.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "HexagonInstrInfo.h"
29 #include "HexagonSubtarget.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DebugLoc.h"
45 #include "llvm/Pass.h"
47 #include "llvm/Support/Debug.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <cstdlib>
55 #include <iterator>
56 #include <map>
57 #include <set>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "hwloops"
64 
65 #ifndef NDEBUG
66 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
67 
68 // Option to create preheader only for a specific function.
69 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
70  cl::init(""));
71 #endif
72 
73 // Option to create a preheader if one doesn't exist.
74 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
75  cl::Hidden, cl::init(true),
76  cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
77 
78 // Turn it off by default. If a preheader block is not created here, the
79 // software pipeliner may be unable to find a block suitable to serve as
80 // a preheader. In that case SWP will not run.
81 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
82  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
83  "instructions"));
84 
85 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
86 
87 namespace llvm {
88 
91 
92 } // end namespace llvm
93 
94 namespace {
95 
96  class CountValue;
97 
98  struct HexagonHardwareLoops : public MachineFunctionPass {
99  MachineLoopInfo *MLI;
102  const HexagonInstrInfo *TII;
103 #ifndef NDEBUG
104  static int Counter;
105 #endif
106 
107  public:
108  static char ID;
109 
110  HexagonHardwareLoops() : MachineFunctionPass(ID) {
112  }
113 
114  bool runOnMachineFunction(MachineFunction &MF) override;
115 
116  StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
117 
118  void getAnalysisUsage(AnalysisUsage &AU) const override {
122  }
123 
124  private:
125  typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
126 
127  /// Kinds of comparisons in the compare instructions.
128  struct Comparison {
129  enum Kind {
130  EQ = 0x01,
131  NE = 0x02,
132  L = 0x04,
133  G = 0x08,
134  U = 0x40,
135  LTs = L,
136  LEs = L | EQ,
137  GTs = G,
138  GEs = G | EQ,
139  LTu = L | U,
140  LEu = L | EQ | U,
141  GTu = G | U,
142  GEu = G | EQ | U
143  };
144 
145  static Kind getSwappedComparison(Kind Cmp) {
146  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
147  if ((Cmp & L) || (Cmp & G))
148  return (Kind)(Cmp ^ (L|G));
149  return Cmp;
150  }
151 
152  static Kind getNegatedComparison(Kind Cmp) {
153  if ((Cmp & L) || (Cmp & G))
154  return (Kind)((Cmp ^ (L | G)) ^ EQ);
155  if ((Cmp & NE) || (Cmp & EQ))
156  return (Kind)(Cmp ^ (EQ | NE));
157  return (Kind)0;
158  }
159 
160  static bool isSigned(Kind Cmp) {
161  return (Cmp & (L | G) && !(Cmp & U));
162  }
163 
164  static bool isUnsigned(Kind Cmp) {
165  return (Cmp & U);
166  }
167  };
168 
169  /// \brief Find the register that contains the loop controlling
170  /// induction variable.
171  /// If successful, it will return true and set the \p Reg, \p IVBump
172  /// and \p IVOp arguments. Otherwise it will return false.
173  /// The returned induction register is the register R that follows the
174  /// following induction pattern:
175  /// loop:
176  /// R = phi ..., [ R.next, LatchBlock ]
177  /// R.next = R + #bump
178  /// if (R.next < #N) goto loop
179  /// IVBump is the immediate value added to R, and IVOp is the instruction
180  /// "R.next = R + #bump".
181  bool findInductionRegister(MachineLoop *L, unsigned &Reg,
182  int64_t &IVBump, MachineInstr *&IVOp) const;
183 
184  /// \brief Return the comparison kind for the specified opcode.
185  Comparison::Kind getComparisonKind(unsigned CondOpc,
186  MachineOperand *InitialValue,
187  const MachineOperand *Endvalue,
188  int64_t IVBump) const;
189 
190  /// \brief Analyze the statements in a loop to determine if the loop
191  /// has a computable trip count and, if so, return a value that represents
192  /// the trip count expression.
193  CountValue *getLoopTripCount(MachineLoop *L,
195 
196  /// \brief Return the expression that represents the number of times
197  /// a loop iterates. The function takes the operands that represent the
198  /// loop start value, loop end value, and induction value. Based upon
199  /// these operands, the function attempts to compute the trip count.
200  /// If the trip count is not directly available (as an immediate value,
201  /// or a register), the function will attempt to insert computation of it
202  /// to the loop's preheader.
203  CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
204  const MachineOperand *End, unsigned IVReg,
205  int64_t IVBump, Comparison::Kind Cmp) const;
206 
207  /// \brief Return true if the instruction is not valid within a hardware
208  /// loop.
209  bool isInvalidLoopOperation(const MachineInstr *MI,
210  bool IsInnerHWLoop) const;
211 
212  /// \brief Return true if the loop contains an instruction that inhibits
213  /// using the hardware loop.
214  bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
215 
216  /// \brief Given a loop, check if we can convert it to a hardware loop.
217  /// If so, then perform the conversion and return true.
218  bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
219 
220  /// \brief Return true if the instruction is now dead.
221  bool isDead(const MachineInstr *MI,
222  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
223 
224  /// \brief Remove the instruction if it is now dead.
225  void removeIfDead(MachineInstr *MI);
226 
227  /// \brief Make sure that the "bump" instruction executes before the
228  /// compare. We need that for the IV fixup, so that the compare
229  /// instruction would not use a bumped value that has not yet been
230  /// defined. If the instructions are out of order, try to reorder them.
231  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
232 
233  /// \brief Return true if MO and MI pair is visited only once. If visited
234  /// more than once, this indicates there is recursion. In such a case,
235  /// return false.
236  bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
237  const MachineOperand *MO,
238  LoopFeederMap &LoopFeederPhi) const;
239 
240  /// \brief Return true if the Phi may generate a value that may underflow,
241  /// or may wrap.
242  bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
244  LoopFeederMap &LoopFeederPhi) const;
245 
246  /// \brief Return true if the induction variable may underflow an unsigned
247  /// value in the first iteration.
248  bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
249  const MachineOperand *EndVal,
251  LoopFeederMap &LoopFeederPhi) const;
252 
253  /// \brief Check if the given operand has a compile-time known constant
254  /// value. Return true if yes, and false otherwise. When returning true, set
255  /// Val to the corresponding constant value.
256  bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
257 
258  /// \brief Check if the operand has a compile-time known constant value.
259  bool isImmediate(const MachineOperand &MO) const {
260  int64_t V;
261  return checkForImmediate(MO, V);
262  }
263 
264  /// \brief Return the immediate for the specified operand.
265  int64_t getImmediate(const MachineOperand &MO) const {
266  int64_t V;
267  if (!checkForImmediate(MO, V))
268  llvm_unreachable("Invalid operand");
269  return V;
270  }
271 
272  /// \brief Reset the given machine operand to now refer to a new immediate
273  /// value. Assumes that the operand was already referencing an immediate
274  /// value, either directly, or via a register.
275  void setImmediate(MachineOperand &MO, int64_t Val);
276 
277  /// \brief Fix the data flow of the induction varible.
278  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
279  /// |
280  /// +-> back to phi
281  /// where "bump" is the increment of the induction variable:
282  /// iv = iv + #const.
283  /// Due to some prior code transformations, the actual flow may look
284  /// like this:
285  /// phi -+-> bump ---> back to phi
286  /// |
287  /// +-> comparison-in-latch (against upper_bound-bump),
288  /// i.e. the comparison that controls the loop execution may be using
289  /// the value of the induction variable from before the increment.
290  ///
291  /// Return true if the loop's flow is the desired one (i.e. it's
292  /// either been fixed, or no fixing was necessary).
293  /// Otherwise, return false. This can happen if the induction variable
294  /// couldn't be identified, or if the value in the latch's comparison
295  /// cannot be adjusted to reflect the post-bump value.
296  bool fixupInductionVariable(MachineLoop *L);
297 
298  /// \brief Given a loop, if it does not have a preheader, create one.
299  /// Return the block that is the preheader.
300  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
301  };
302 
303  char HexagonHardwareLoops::ID = 0;
304 #ifndef NDEBUG
306 #endif
307 
308  /// \brief Abstraction for a trip count of a loop. A smaller version
309  /// of the MachineOperand class without the concerns of changing the
310  /// operand representation.
311  class CountValue {
312  public:
313  enum CountValueType {
314  CV_Register,
315  CV_Immediate
316  };
317 
318  private:
319  CountValueType Kind;
320  union Values {
321  struct {
322  unsigned Reg;
323  unsigned Sub;
324  } R;
325  unsigned ImmVal;
326  } Contents;
327 
328  public:
329  explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
330  Kind = t;
331  if (Kind == CV_Register) {
332  Contents.R.Reg = v;
333  Contents.R.Sub = u;
334  } else {
335  Contents.ImmVal = v;
336  }
337  }
338 
339  bool isReg() const { return Kind == CV_Register; }
340  bool isImm() const { return Kind == CV_Immediate; }
341 
342  unsigned getReg() const {
343  assert(isReg() && "Wrong CountValue accessor");
344  return Contents.R.Reg;
345  }
346  unsigned getSubReg() const {
347  assert(isReg() && "Wrong CountValue accessor");
348  return Contents.R.Sub;
349  }
350  unsigned getImm() const {
351  assert(isImm() && "Wrong CountValue accessor");
352  return Contents.ImmVal;
353  }
354 
355  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
356  if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
357  if (isImm()) { OS << Contents.ImmVal; }
358  }
359  };
360 
361 } // end anonymous namespace
362 
363 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
364  "Hexagon Hardware Loops", false, false)
367 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
368  "Hexagon Hardware Loops", false, false)
369 
371  return new HexagonHardwareLoops();
372 }
373 
374 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
375  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
376  if (skipFunction(*MF.getFunction()))
377  return false;
378 
379  bool Changed = false;
380 
381  MLI = &getAnalysis<MachineLoopInfo>();
382  MRI = &MF.getRegInfo();
383  MDT = &getAnalysis<MachineDominatorTree>();
384  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
385 
386  for (auto &L : *MLI)
387  if (!L->getParentLoop()) {
388  bool L0Used = false;
389  bool L1Used = false;
390  Changed |= convertToHardwareLoop(L, L0Used, L1Used);
391  }
392 
393  return Changed;
394 }
395 
396 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
397  unsigned &Reg,
398  int64_t &IVBump,
399  MachineInstr *&IVOp
400  ) const {
401  MachineBasicBlock *Header = L->getHeader();
402  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
403  MachineBasicBlock *Latch = L->getLoopLatch();
404  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
405  if (!Header || !Preheader || !Latch || !ExitingBlock)
406  return false;
407 
408  // This pair represents an induction register together with an immediate
409  // value that will be added to it in each loop iteration.
410  typedef std::pair<unsigned,int64_t> RegisterBump;
411 
412  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
413  // from an induction operation
414  // R.next = R + bump
415  // where bump is an immediate value.
416  typedef std::map<unsigned,RegisterBump> InductionMap;
417 
418  InductionMap IndMap;
419 
420  typedef MachineBasicBlock::instr_iterator instr_iterator;
421  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
422  I != E && I->isPHI(); ++I) {
423  MachineInstr *Phi = &*I;
424 
425  // Have a PHI instruction. Get the operand that corresponds to the
426  // latch block, and see if is a result of an addition of form "reg+imm",
427  // where the "reg" is defined by the PHI node we are looking at.
428  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
429  if (Phi->getOperand(i+1).getMBB() != Latch)
430  continue;
431 
432  unsigned PhiOpReg = Phi->getOperand(i).getReg();
433  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
434 
435  if (DI->getDesc().isAdd()) {
436  // If the register operand to the add is the PHI we're looking at, this
437  // meets the induction pattern.
438  unsigned IndReg = DI->getOperand(1).getReg();
439  MachineOperand &Opnd2 = DI->getOperand(2);
440  int64_t V;
441  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
442  unsigned UpdReg = DI->getOperand(0).getReg();
443  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
444  }
445  }
446  } // for (i)
447  } // for (instr)
448 
450  MachineBasicBlock *TB = nullptr, *FB = nullptr;
451  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
452  if (NotAnalyzed)
453  return false;
454 
455  unsigned PredR, PredPos, PredRegFlags;
456  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
457  return false;
458 
459  MachineInstr *PredI = MRI->getVRegDef(PredR);
460  if (!PredI->isCompare())
461  return false;
462 
463  unsigned CmpReg1 = 0, CmpReg2 = 0;
464  int CmpImm = 0, CmpMask = 0;
465  bool CmpAnalyzed =
466  TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
467  // Fail if the compare was not analyzed, or it's not comparing a register
468  // with an immediate value. Not checking the mask here, since we handle
469  // the individual compare opcodes (including A4_cmpb*) later on.
470  if (!CmpAnalyzed)
471  return false;
472 
473  // Exactly one of the input registers to the comparison should be among
474  // the induction registers.
475  InductionMap::iterator IndMapEnd = IndMap.end();
476  InductionMap::iterator F = IndMapEnd;
477  if (CmpReg1 != 0) {
478  InductionMap::iterator F1 = IndMap.find(CmpReg1);
479  if (F1 != IndMapEnd)
480  F = F1;
481  }
482  if (CmpReg2 != 0) {
483  InductionMap::iterator F2 = IndMap.find(CmpReg2);
484  if (F2 != IndMapEnd) {
485  if (F != IndMapEnd)
486  return false;
487  F = F2;
488  }
489  }
490  if (F == IndMapEnd)
491  return false;
492 
493  Reg = F->second.first;
494  IVBump = F->second.second;
495  IVOp = MRI->getVRegDef(F->first);
496  return true;
497 }
498 
499 // Return the comparison kind for the specified opcode.
500 HexagonHardwareLoops::Comparison::Kind
501 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
502  MachineOperand *InitialValue,
503  const MachineOperand *EndValue,
504  int64_t IVBump) const {
506  switch (CondOpc) {
507  case Hexagon::C2_cmpeqi:
508  case Hexagon::C2_cmpeq:
509  case Hexagon::C2_cmpeqp:
510  Cmp = Comparison::EQ;
511  break;
512  case Hexagon::C4_cmpneq:
513  case Hexagon::C4_cmpneqi:
514  Cmp = Comparison::NE;
515  break;
516  case Hexagon::C4_cmplte:
517  Cmp = Comparison::LEs;
518  break;
519  case Hexagon::C4_cmplteu:
520  Cmp = Comparison::LEu;
521  break;
522  case Hexagon::C2_cmpgtui:
523  case Hexagon::C2_cmpgtu:
524  case Hexagon::C2_cmpgtup:
525  Cmp = Comparison::GTu;
526  break;
527  case Hexagon::C2_cmpgti:
528  case Hexagon::C2_cmpgt:
529  case Hexagon::C2_cmpgtp:
530  Cmp = Comparison::GTs;
531  break;
532  default:
533  return (Comparison::Kind)0;
534  }
535  return Cmp;
536 }
537 
538 /// \brief Analyze the statements in a loop to determine if the loop has
539 /// a computable trip count and, if so, return a value that represents
540 /// the trip count expression.
541 ///
542 /// This function iterates over the phi nodes in the loop to check for
543 /// induction variable patterns that are used in the calculation for
544 /// the number of time the loop is executed.
545 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
547  MachineBasicBlock *TopMBB = L->getTopBlock();
549  assert(PI != TopMBB->pred_end() &&
550  "Loop must have more than one incoming edge!");
551  MachineBasicBlock *Backedge = *PI++;
552  if (PI == TopMBB->pred_end()) // dead loop?
553  return nullptr;
554  MachineBasicBlock *Incoming = *PI++;
555  if (PI != TopMBB->pred_end()) // multiple backedges?
556  return nullptr;
557 
558  // Make sure there is one incoming and one backedge and determine which
559  // is which.
560  if (L->contains(Incoming)) {
561  if (L->contains(Backedge))
562  return nullptr;
563  std::swap(Incoming, Backedge);
564  } else if (!L->contains(Backedge))
565  return nullptr;
566 
567  // Look for the cmp instruction to determine if we can get a useful trip
568  // count. The trip count can be either a register or an immediate. The
569  // location of the value depends upon the type (reg or imm).
570  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
571  if (!ExitingBlock)
572  return nullptr;
573 
574  unsigned IVReg = 0;
575  int64_t IVBump = 0;
576  MachineInstr *IVOp;
577  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
578  if (!FoundIV)
579  return nullptr;
580 
581  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
582 
583  MachineOperand *InitialValue = nullptr;
584  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
585  MachineBasicBlock *Latch = L->getLoopLatch();
586  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
587  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
588  if (MBB == Preheader)
589  InitialValue = &IV_Phi->getOperand(i);
590  else if (MBB == Latch)
591  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
592  }
593  if (!InitialValue)
594  return nullptr;
595 
597  MachineBasicBlock *TB = nullptr, *FB = nullptr;
598  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
599  if (NotAnalyzed)
600  return nullptr;
601 
602  MachineBasicBlock *Header = L->getHeader();
603  // TB must be non-null. If FB is also non-null, one of them must be
604  // the header. Otherwise, branch to TB could be exiting the loop, and
605  // the fall through can go to the header.
606  assert (TB && "Exit block without a branch?");
607  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
608  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
610  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
611  if (NotAnalyzed)
612  return nullptr;
613  if (TB == Latch)
614  TB = (LTB == Header) ? LTB : LFB;
615  else
616  FB = (LTB == Header) ? LTB: LFB;
617  }
618  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
619  if (!TB || (FB && TB != Header && FB != Header))
620  return nullptr;
621 
622  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
623  // to put imm(0), followed by P in the vector Cond.
624  // If TB is not the header, it means that the "not-taken" path must lead
625  // to the header.
626  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
627  unsigned PredReg, PredPos, PredRegFlags;
628  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
629  return nullptr;
630  MachineInstr *CondI = MRI->getVRegDef(PredReg);
631  unsigned CondOpc = CondI->getOpcode();
632 
633  unsigned CmpReg1 = 0, CmpReg2 = 0;
634  int Mask = 0, ImmValue = 0;
635  bool AnalyzedCmp =
636  TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
637  if (!AnalyzedCmp)
638  return nullptr;
639 
640  // The comparison operator type determines how we compute the loop
641  // trip count.
642  OldInsts.push_back(CondI);
643  OldInsts.push_back(IVOp);
644 
645  // Sadly, the following code gets information based on the position
646  // of the operands in the compare instruction. This has to be done
647  // this way, because the comparisons check for a specific relationship
648  // between the operands (e.g. is-less-than), rather than to find out
649  // what relationship the operands are in (as on PPC).
650  Comparison::Kind Cmp;
651  bool isSwapped = false;
652  const MachineOperand &Op1 = CondI->getOperand(1);
653  const MachineOperand &Op2 = CondI->getOperand(2);
654  const MachineOperand *EndValue = nullptr;
655 
656  if (Op1.isReg()) {
657  if (Op2.isImm() || Op1.getReg() == IVReg)
658  EndValue = &Op2;
659  else {
660  EndValue = &Op1;
661  isSwapped = true;
662  }
663  }
664 
665  if (!EndValue)
666  return nullptr;
667 
668  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
669  if (!Cmp)
670  return nullptr;
671  if (Negated)
672  Cmp = Comparison::getNegatedComparison(Cmp);
673  if (isSwapped)
674  Cmp = Comparison::getSwappedComparison(Cmp);
675 
676  if (InitialValue->isReg()) {
677  unsigned R = InitialValue->getReg();
678  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
679  if (!MDT->properlyDominates(DefBB, Header))
680  return nullptr;
681  OldInsts.push_back(MRI->getVRegDef(R));
682  }
683  if (EndValue->isReg()) {
684  unsigned R = EndValue->getReg();
685  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
686  if (!MDT->properlyDominates(DefBB, Header))
687  return nullptr;
688  OldInsts.push_back(MRI->getVRegDef(R));
689  }
690 
691  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
692 }
693 
694 /// \brief Helper function that returns the expression that represents the
695 /// number of times a loop iterates. The function takes the operands that
696 /// represent the loop start value, loop end value, and induction value.
697 /// Based upon these operands, the function attempts to compute the trip count.
698 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
699  const MachineOperand *Start,
700  const MachineOperand *End,
701  unsigned IVReg,
702  int64_t IVBump,
703  Comparison::Kind Cmp) const {
704  // Cannot handle comparison EQ, i.e. while (A == B).
705  if (Cmp == Comparison::EQ)
706  return nullptr;
707 
708  // Check if either the start or end values are an assignment of an immediate.
709  // If so, use the immediate value rather than the register.
710  if (Start->isReg()) {
711  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
712  if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
713  StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
714  Start = &StartValInstr->getOperand(1);
715  }
716  if (End->isReg()) {
717  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
718  if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
719  EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
720  End = &EndValInstr->getOperand(1);
721  }
722 
723  if (!Start->isReg() && !Start->isImm())
724  return nullptr;
725  if (!End->isReg() && !End->isImm())
726  return nullptr;
727 
728  bool CmpLess = Cmp & Comparison::L;
729  bool CmpGreater = Cmp & Comparison::G;
730  bool CmpHasEqual = Cmp & Comparison::EQ;
731 
732  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
733  if (CmpLess && IVBump < 0)
734  // Loop going while iv is "less" with the iv value going down. Must wrap.
735  return nullptr;
736 
737  if (CmpGreater && IVBump > 0)
738  // Loop going while iv is "greater" with the iv value going up. Must wrap.
739  return nullptr;
740 
741  // Phis that may feed into the loop.
742  LoopFeederMap LoopFeederPhi;
743 
744  // Check if the initial value may be zero and can be decremented in the first
745  // iteration. If the value is zero, the endloop instruction will not decrement
746  // the loop counter, so we shouldn't generate a hardware loop in this case.
747  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
748  LoopFeederPhi))
749  return nullptr;
750 
751  if (Start->isImm() && End->isImm()) {
752  // Both, start and end are immediates.
753  int64_t StartV = Start->getImm();
754  int64_t EndV = End->getImm();
755  int64_t Dist = EndV - StartV;
756  if (Dist == 0)
757  return nullptr;
758 
759  bool Exact = (Dist % IVBump) == 0;
760 
761  if (Cmp == Comparison::NE) {
762  if (!Exact)
763  return nullptr;
764  if ((Dist < 0) ^ (IVBump < 0))
765  return nullptr;
766  }
767 
768  // For comparisons that include the final value (i.e. include equality
769  // with the final value), we need to increase the distance by 1.
770  if (CmpHasEqual)
771  Dist = Dist > 0 ? Dist+1 : Dist-1;
772 
773  // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
774  // CmpGreater should imply Dist < 0. These conditions could actually
775  // fail, for example, in unreachable code (which may still appear to be
776  // reachable in the CFG).
777  if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
778  return nullptr;
779 
780  // "Normalized" distance, i.e. with the bump set to +-1.
781  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
782  : (-Dist + (-IVBump - 1)) / (-IVBump);
783  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
784 
785  uint64_t Count = Dist1;
786 
787  if (Count > 0xFFFFFFFFULL)
788  return nullptr;
789 
790  return new CountValue(CountValue::CV_Immediate, Count);
791  }
792 
793  // A general case: Start and End are some values, but the actual
794  // iteration count may not be available. If it is not, insert
795  // a computation of it into the preheader.
796 
797  // If the induction variable bump is not a power of 2, quit.
798  // Othwerise we'd need a general integer division.
799  if (!isPowerOf2_64(std::abs(IVBump)))
800  return nullptr;
801 
802  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
803  assert (PH && "Should have a preheader by now");
805  DebugLoc DL;
806  if (InsertPos != PH->end())
807  DL = InsertPos->getDebugLoc();
808 
809  // If Start is an immediate and End is a register, the trip count
810  // will be "reg - imm". Hexagon's "subtract immediate" instruction
811  // is actually "reg + -imm".
812 
813  // If the loop IV is going downwards, i.e. if the bump is negative,
814  // then the iteration count (computed as End-Start) will need to be
815  // negated. To avoid the negation, just swap Start and End.
816  if (IVBump < 0) {
817  std::swap(Start, End);
818  IVBump = -IVBump;
819  }
820  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
821  // Signedness, and "including equality" are preserved.
822 
823  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
824  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
825 
826  int64_t StartV = 0, EndV = 0;
827  if (Start->isImm())
828  StartV = Start->getImm();
829  if (End->isImm())
830  EndV = End->getImm();
831 
832  int64_t AdjV = 0;
833  // To compute the iteration count, we would need this computation:
834  // Count = (End - Start + (IVBump-1)) / IVBump
835  // or, when CmpHasEqual:
836  // Count = (End - Start + (IVBump-1)+1) / IVBump
837  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
838  // generating an instruction specifically to add it if we can adjust
839  // the immediate values for Start or End.
840 
841  if (CmpHasEqual) {
842  // Need to add 1 to the total iteration count.
843  if (Start->isImm())
844  StartV--;
845  else if (End->isImm())
846  EndV++;
847  else
848  AdjV += 1;
849  }
850 
851  if (Cmp != Comparison::NE) {
852  if (Start->isImm())
853  StartV -= (IVBump-1);
854  else if (End->isImm())
855  EndV += (IVBump-1);
856  else
857  AdjV += (IVBump-1);
858  }
859 
860  unsigned R = 0, SR = 0;
861  if (Start->isReg()) {
862  R = Start->getReg();
863  SR = Start->getSubReg();
864  } else {
865  R = End->getReg();
866  SR = End->getSubReg();
867  }
868  const TargetRegisterClass *RC = MRI->getRegClass(R);
869  // Hardware loops cannot handle 64-bit registers. If it's a double
870  // register, it has to have a subregister.
871  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
872  return nullptr;
873  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
874 
875  // Compute DistR (register with the distance between Start and End).
876  unsigned DistR, DistSR;
877 
878  // Avoid special case, where the start value is an imm(0).
879  if (Start->isImm() && StartV == 0) {
880  DistR = End->getReg();
881  DistSR = End->getSubReg();
882  } else {
883  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
884  (RegToImm ? TII->get(Hexagon::A2_subri) :
885  TII->get(Hexagon::A2_addi));
886  if (RegToReg || RegToImm) {
887  unsigned SubR = MRI->createVirtualRegister(IntRC);
888  MachineInstrBuilder SubIB =
889  BuildMI(*PH, InsertPos, DL, SubD, SubR);
890 
891  if (RegToReg)
892  SubIB.addReg(End->getReg(), 0, End->getSubReg())
893  .addReg(Start->getReg(), 0, Start->getSubReg());
894  else
895  SubIB.addImm(EndV)
896  .addReg(Start->getReg(), 0, Start->getSubReg());
897  DistR = SubR;
898  } else {
899  // If the loop has been unrolled, we should use the original loop count
900  // instead of recalculating the value. This will avoid additional
901  // 'Add' instruction.
902  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
903  if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
904  EndValInstr->getOperand(2).getImm() == StartV) {
905  DistR = EndValInstr->getOperand(1).getReg();
906  } else {
907  unsigned SubR = MRI->createVirtualRegister(IntRC);
908  MachineInstrBuilder SubIB =
909  BuildMI(*PH, InsertPos, DL, SubD, SubR);
910  SubIB.addReg(End->getReg(), 0, End->getSubReg())
911  .addImm(-StartV);
912  DistR = SubR;
913  }
914  }
915  DistSR = 0;
916  }
917 
918  // From DistR, compute AdjR (register with the adjusted distance).
919  unsigned AdjR, AdjSR;
920 
921  if (AdjV == 0) {
922  AdjR = DistR;
923  AdjSR = DistSR;
924  } else {
925  // Generate CountR = ADD DistR, AdjVal
926  unsigned AddR = MRI->createVirtualRegister(IntRC);
927  MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
928  BuildMI(*PH, InsertPos, DL, AddD, AddR)
929  .addReg(DistR, 0, DistSR)
930  .addImm(AdjV);
931 
932  AdjR = AddR;
933  AdjSR = 0;
934  }
935 
936  // From AdjR, compute CountR (register with the final count).
937  unsigned CountR, CountSR;
938 
939  if (IVBump == 1) {
940  CountR = AdjR;
941  CountSR = AdjSR;
942  } else {
943  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
944  unsigned Shift = Log2_32(IVBump);
945 
946  // Generate NormR = LSR DistR, Shift.
947  unsigned LsrR = MRI->createVirtualRegister(IntRC);
948  const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
949  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
950  .addReg(AdjR, 0, AdjSR)
951  .addImm(Shift);
952 
953  CountR = LsrR;
954  CountSR = 0;
955  }
956 
957  return new CountValue(CountValue::CV_Register, CountR, CountSR);
958 }
959 
960 /// \brief Return true if the operation is invalid within hardware loop.
961 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
962  bool IsInnerHWLoop) const {
963 
964  // Call is not allowed because the callee may use a hardware loop except for
965  // the case when the call never returns.
966  if (MI->getDesc().isCall())
967  return !TII->doesNotReturn(*MI);
968 
969  // Check if the instruction defines a hardware loop register.
970  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
971  const MachineOperand &MO = MI->getOperand(i);
972  if (!MO.isReg() || !MO.isDef())
973  continue;
974  unsigned R = MO.getReg();
975  if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
976  R == Hexagon::LC1 || R == Hexagon::SA1))
977  return true;
978  if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
979  return true;
980  }
981  return false;
982 }
983 
984 /// \brief Return true if the loop contains an instruction that inhibits
985 /// the use of the hardware loop instruction.
986 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
987  bool IsInnerHWLoop) const {
988  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
989  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
990  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
991  MachineBasicBlock *MBB = Blocks[i];
993  MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
994  const MachineInstr *MI = &*MII;
995  if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
996  DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
997  return true;
998  }
999  }
1000  }
1001  return false;
1002 }
1003 
1004 /// \brief Returns true if the instruction is dead. This was essentially
1005 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1006 /// for inline asm, physical registers and instructions with side effects
1007 /// removed.
1008 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1009  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1010  // Examine each operand.
1011  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1012  const MachineOperand &MO = MI->getOperand(i);
1013  if (!MO.isReg() || !MO.isDef())
1014  continue;
1015 
1016  unsigned Reg = MO.getReg();
1017  if (MRI->use_nodbg_empty(Reg))
1018  continue;
1019 
1020  typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
1021 
1022  // This instruction has users, but if the only user is the phi node for the
1023  // parent block, and the only use of that phi node is this instruction, then
1024  // this instruction is dead: both it (and the phi node) can be removed.
1025  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1026  use_nodbg_iterator End = MRI->use_nodbg_end();
1027  if (std::next(I) != End || !I->getParent()->isPHI())
1028  return false;
1029 
1030  MachineInstr *OnePhi = I->getParent();
1031  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1032  const MachineOperand &OPO = OnePhi->getOperand(j);
1033  if (!OPO.isReg() || !OPO.isDef())
1034  continue;
1035 
1036  unsigned OPReg = OPO.getReg();
1037  use_nodbg_iterator nextJ;
1038  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1039  J != End; J = nextJ) {
1040  nextJ = std::next(J);
1041  MachineOperand &Use = *J;
1042  MachineInstr *UseMI = Use.getParent();
1043 
1044  // If the phi node has a user that is not MI, bail.
1045  if (MI != UseMI)
1046  return false;
1047  }
1048  }
1049  DeadPhis.push_back(OnePhi);
1050  }
1051 
1052  // If there are no defs with uses, the instruction is dead.
1053  return true;
1054 }
1055 
1056 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1057  // This procedure was essentially copied from DeadMachineInstructionElim.
1058 
1060  if (isDead(MI, DeadPhis)) {
1061  DEBUG(dbgs() << "HW looping will remove: " << *MI);
1062 
1063  // It is possible that some DBG_VALUE instructions refer to this
1064  // instruction. Examine each def operand for such references;
1065  // if found, mark the DBG_VALUE as undef (but don't delete it).
1066  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1067  const MachineOperand &MO = MI->getOperand(i);
1068  if (!MO.isReg() || !MO.isDef())
1069  continue;
1070  unsigned Reg = MO.getReg();
1072  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1073  E = MRI->use_end(); I != E; I = nextI) {
1074  nextI = std::next(I); // I is invalidated by the setReg
1075  MachineOperand &Use = *I;
1076  MachineInstr *UseMI = I->getParent();
1077  if (UseMI == MI)
1078  continue;
1079  if (Use.isDebug())
1080  UseMI->getOperand(0).setReg(0U);
1081  }
1082  }
1083 
1084  MI->eraseFromParent();
1085  for (unsigned i = 0; i < DeadPhis.size(); ++i)
1086  DeadPhis[i]->eraseFromParent();
1087  }
1088 }
1089 
1090 /// \brief Check if the loop is a candidate for converting to a hardware
1091 /// loop. If so, then perform the transformation.
1092 ///
1093 /// This function works on innermost loops first. A loop can be converted
1094 /// if it is a counting loop; either a register value or an immediate.
1095 ///
1096 /// The code makes several assumptions about the representation of the loop
1097 /// in llvm.
1098 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1099  bool &RecL0used,
1100  bool &RecL1used) {
1101  // This is just for sanity.
1102  assert(L->getHeader() && "Loop without a header?");
1103 
1104  bool Changed = false;
1105  bool L0Used = false;
1106  bool L1Used = false;
1107 
1108  // Process nested loops first.
1109  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1110  Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1111  L0Used |= RecL0used;
1112  L1Used |= RecL1used;
1113  }
1114 
1115  // If a nested loop has been converted, then we can't convert this loop.
1116  if (Changed && L0Used && L1Used)
1117  return Changed;
1118 
1119  unsigned LOOP_i;
1120  unsigned LOOP_r;
1121  unsigned ENDLOOP;
1122 
1123  // Flag used to track loopN instruction:
1124  // 1 - Hardware loop is being generated for the inner most loop.
1125  // 0 - Hardware loop is being generated for the outer loop.
1126  unsigned IsInnerHWLoop = 1;
1127 
1128  if (L0Used) {
1129  LOOP_i = Hexagon::J2_loop1i;
1130  LOOP_r = Hexagon::J2_loop1r;
1131  ENDLOOP = Hexagon::ENDLOOP1;
1132  IsInnerHWLoop = 0;
1133  } else {
1134  LOOP_i = Hexagon::J2_loop0i;
1135  LOOP_r = Hexagon::J2_loop0r;
1136  ENDLOOP = Hexagon::ENDLOOP0;
1137  }
1138 
1139 #ifndef NDEBUG
1140  // Stop trying after reaching the limit (if any).
1141  int Limit = HWLoopLimit;
1142  if (Limit >= 0) {
1143  if (Counter >= HWLoopLimit)
1144  return false;
1145  Counter++;
1146  }
1147 #endif
1148 
1149  // Does the loop contain any invalid instructions?
1150  if (containsInvalidInstruction(L, IsInnerHWLoop))
1151  return false;
1152 
1153  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1154  // Don't generate hw loop if the loop has more than one exit.
1155  if (!LastMBB)
1156  return false;
1157 
1159  if (LastI == LastMBB->end())
1160  return false;
1161 
1162  // Is the induction variable bump feeding the latch condition?
1163  if (!fixupInductionVariable(L))
1164  return false;
1165 
1166  // Ensure the loop has a preheader: the loop instruction will be
1167  // placed there.
1168  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1169  if (!Preheader) {
1170  Preheader = createPreheaderForLoop(L);
1171  if (!Preheader)
1172  return false;
1173  }
1174 
1175  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1176 
1178  // Are we able to determine the trip count for the loop?
1179  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1180  if (!TripCount)
1181  return false;
1182 
1183  // Is the trip count available in the preheader?
1184  if (TripCount->isReg()) {
1185  // There will be a use of the register inserted into the preheader,
1186  // so make sure that the register is actually defined at that point.
1187  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1188  MachineBasicBlock *BBDef = TCDef->getParent();
1189  if (!MDT->dominates(BBDef, Preheader))
1190  return false;
1191  }
1192 
1193  // Determine the loop start.
1194  MachineBasicBlock *TopBlock = L->getTopBlock();
1195  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1196  MachineBasicBlock *LoopStart = nullptr;
1197  if (ExitingBlock != L->getLoopLatch()) {
1198  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1200 
1201  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1202  return false;
1203 
1204  if (L->contains(TB))
1205  LoopStart = TB;
1206  else if (L->contains(FB))
1207  LoopStart = FB;
1208  else
1209  return false;
1210  }
1211  else
1212  LoopStart = TopBlock;
1213 
1214  // Convert the loop to a hardware loop.
1215  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1216  DebugLoc DL;
1217  if (InsertPos != Preheader->end())
1218  DL = InsertPos->getDebugLoc();
1219 
1220  if (TripCount->isReg()) {
1221  // Create a copy of the loop count register.
1222  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1223  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1224  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1225  // Add the Loop instruction to the beginning of the loop.
1226  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1227  .addReg(CountReg);
1228  } else {
1229  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1230  // Add the Loop immediate instruction to the beginning of the loop,
1231  // if the immediate fits in the instructions. Otherwise, we need to
1232  // create a new virtual register.
1233  int64_t CountImm = TripCount->getImm();
1234  if (!TII->isValidOffset(LOOP_i, CountImm)) {
1235  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1236  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1237  .addImm(CountImm);
1238  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1239  .addMBB(LoopStart).addReg(CountReg);
1240  } else
1241  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1242  .addMBB(LoopStart).addImm(CountImm);
1243  }
1244 
1245  // Make sure the loop start always has a reference in the CFG. We need
1246  // to create a BlockAddress operand to get this mechanism to work both the
1247  // MachineBasicBlock and BasicBlock objects need the flag set.
1248  LoopStart->setHasAddressTaken();
1249  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1250  // object.
1251  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1252 
1253  // Replace the loop branch with an endloop instruction.
1254  DebugLoc LastIDL = LastI->getDebugLoc();
1255  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1256 
1257  // The loop ends with either:
1258  // - a conditional branch followed by an unconditional branch, or
1259  // - a conditional branch to the loop start.
1260  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1261  LastI->getOpcode() == Hexagon::J2_jumpf) {
1262  // Delete one and change/add an uncond. branch to out of the loop.
1263  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1264  LastI = LastMBB->erase(LastI);
1265  if (!L->contains(BranchTarget)) {
1266  if (LastI != LastMBB->end())
1267  LastI = LastMBB->erase(LastI);
1269  TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1270  }
1271  } else {
1272  // Conditional branch to loop start; just delete it.
1273  LastMBB->erase(LastI);
1274  }
1275  delete TripCount;
1276 
1277  // The induction operation and the comparison may now be
1278  // unneeded. If these are unneeded, then remove them.
1279  for (unsigned i = 0; i < OldInsts.size(); ++i)
1280  removeIfDead(OldInsts[i]);
1281 
1282  ++NumHWLoops;
1283 
1284  // Set RecL1used and RecL0used only after hardware loop has been
1285  // successfully generated. Doing it earlier can cause wrong loop instruction
1286  // to be used.
1287  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1288  RecL1used = true;
1289  else
1290  RecL0used = true;
1291 
1292  return true;
1293 }
1294 
1295 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1296  MachineInstr *CmpI) {
1297  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1298 
1299  MachineBasicBlock *BB = BumpI->getParent();
1300  if (CmpI->getParent() != BB)
1301  return false;
1302 
1303  typedef MachineBasicBlock::instr_iterator instr_iterator;
1304  // Check if things are in order to begin with.
1305  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1306  if (&*I == CmpI)
1307  return true;
1308 
1309  // Out of order.
1310  unsigned PredR = CmpI->getOperand(0).getReg();
1311  bool FoundBump = false;
1312  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1313  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1314  MachineInstr *In = &*I;
1315  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1316  MachineOperand &MO = In->getOperand(i);
1317  if (MO.isReg() && MO.isUse()) {
1318  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1319  return false;
1320  }
1321  }
1322 
1323  if (In == BumpI) {
1324  BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1325  FoundBump = true;
1326  break;
1327  }
1328  }
1329  assert (FoundBump && "Cannot determine instruction order");
1330  return FoundBump;
1331 }
1332 
1333 /// This function is required to break recursion. Visiting phis in a loop may
1334 /// result in recursion during compilation. We break the recursion by making
1335 /// sure that we visit a MachineOperand and its definition in a
1336 /// MachineInstruction only once. If we attempt to visit more than once, then
1337 /// there is recursion, and will return false.
1338 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1339  MachineInstr *MI,
1340  const MachineOperand *MO,
1341  LoopFeederMap &LoopFeederPhi) const {
1342  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1343  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1344  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1345  // Ignore all BBs that form Loop.
1346  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1347  MachineBasicBlock *MBB = Blocks[i];
1348  if (A == MBB)
1349  return false;
1350  }
1351  MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1352  LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1353  return true;
1354  } else
1355  // Already visited node.
1356  return false;
1357 }
1358 
1359 /// Return true if a Phi may generate a value that can underflow.
1360 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1361 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1362  MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1363  MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1364  assert(Phi->isPHI() && "Expecting a Phi.");
1365  // Walk through each Phi, and its used operands. Make sure that
1366  // if there is recursion in Phi, we won't generate hardware loops.
1367  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1368  if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1369  if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1370  Phi->getParent(), L, LoopFeederPhi))
1371  return true;
1372  return false;
1373 }
1374 
1375 /// Return true if the induction variable can underflow in the first iteration.
1376 /// An example, is an initial unsigned value that is 0 and is decrement in the
1377 /// first itertion of a do-while loop. In this case, we cannot generate a
1378 /// hardware loop because the endloop instruction does not decrement the loop
1379 /// counter if it is <= 1. We only need to perform this analysis if the
1380 /// initial value is a register.
1381 ///
1382 /// This function assumes the initial value may underfow unless proven
1383 /// otherwise. If the type is signed, then we don't care because signed
1384 /// underflow is undefined. We attempt to prove the initial value is not
1385 /// zero by perfoming a crude analysis of the loop counter. This function
1386 /// checks if the initial value is used in any comparison prior to the loop
1387 /// and, if so, assumes the comparison is a range check. This is inexact,
1388 /// but will catch the simple cases.
1389 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1390  const MachineOperand *InitVal, const MachineOperand *EndVal,
1391  MachineBasicBlock *MBB, MachineLoop *L,
1392  LoopFeederMap &LoopFeederPhi) const {
1393  // Only check register values since they are unknown.
1394  if (!InitVal->isReg())
1395  return false;
1396 
1397  if (!EndVal->isImm())
1398  return false;
1399 
1400  // A register value that is assigned an immediate is a known value, and it
1401  // won't underflow in the first iteration.
1402  int64_t Imm;
1403  if (checkForImmediate(*InitVal, Imm))
1404  return (EndVal->getImm() == Imm);
1405 
1406  unsigned Reg = InitVal->getReg();
1407 
1408  // We don't know the value of a physical register.
1410  return true;
1411 
1412  MachineInstr *Def = MRI->getVRegDef(Reg);
1413  if (!Def)
1414  return true;
1415 
1416  // If the initial value is a Phi or copy and the operands may not underflow,
1417  // then the definition cannot be underflow either.
1418  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1419  L, LoopFeederPhi))
1420  return false;
1421  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1422  EndVal, Def->getParent(),
1423  L, LoopFeederPhi))
1424  return false;
1425 
1426  // Iterate over the uses of the initial value. If the initial value is used
1427  // in a compare, then we assume this is a range check that ensures the loop
1428  // doesn't underflow. This is not an exact test and should be improved.
1429  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1430  E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1431  MachineInstr *MI = &*I;
1432  unsigned CmpReg1 = 0, CmpReg2 = 0;
1433  int CmpMask = 0, CmpValue = 0;
1434 
1435  if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1436  continue;
1437 
1438  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1440  if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1441  continue;
1442 
1443  Comparison::Kind Cmp =
1444  getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1445  if (Cmp == 0)
1446  continue;
1447  if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1448  Cmp = Comparison::getNegatedComparison(Cmp);
1449  if (CmpReg2 != 0 && CmpReg2 == Reg)
1450  Cmp = Comparison::getSwappedComparison(Cmp);
1451 
1452  // Signed underflow is undefined.
1453  if (Comparison::isSigned(Cmp))
1454  return false;
1455 
1456  // Check if there is a comparison of the initial value. If the initial value
1457  // is greater than or not equal to another value, then assume this is a
1458  // range check.
1459  if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1460  return false;
1461  }
1462 
1463  // OK - this is a hack that needs to be improved. We really need to analyze
1464  // the instructions performed on the initial value. This works on the simplest
1465  // cases only.
1466  if (!Def->isCopy() && !Def->isPHI())
1467  return false;
1468 
1469  return true;
1470 }
1471 
1472 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1473  int64_t &Val) const {
1474  if (MO.isImm()) {
1475  Val = MO.getImm();
1476  return true;
1477  }
1478  if (!MO.isReg())
1479  return false;
1480 
1481  // MO is a register. Check whether it is defined as an immediate value,
1482  // and if so, get the value of it in TV. That value will then need to be
1483  // processed to handle potential subregisters in MO.
1484  int64_t TV;
1485 
1486  unsigned R = MO.getReg();
1488  return false;
1489  MachineInstr *DI = MRI->getVRegDef(R);
1490  unsigned DOpc = DI->getOpcode();
1491  switch (DOpc) {
1492  case TargetOpcode::COPY:
1493  case Hexagon::A2_tfrsi:
1494  case Hexagon::A2_tfrpi:
1495  case Hexagon::CONST32:
1496  case Hexagon::CONST64: {
1497  // Call recursively to avoid an extra check whether operand(1) is
1498  // indeed an immediate (it could be a global address, for example),
1499  // plus we can handle COPY at the same time.
1500  if (!checkForImmediate(DI->getOperand(1), TV))
1501  return false;
1502  break;
1503  }
1504  case Hexagon::A2_combineii:
1505  case Hexagon::A4_combineir:
1506  case Hexagon::A4_combineii:
1507  case Hexagon::A4_combineri:
1508  case Hexagon::A2_combinew: {
1509  const MachineOperand &S1 = DI->getOperand(1);
1510  const MachineOperand &S2 = DI->getOperand(2);
1511  int64_t V1, V2;
1512  if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1513  return false;
1514  TV = V2 | (V1 << 32);
1515  break;
1516  }
1517  case TargetOpcode::REG_SEQUENCE: {
1518  const MachineOperand &S1 = DI->getOperand(1);
1519  const MachineOperand &S3 = DI->getOperand(3);
1520  int64_t V1, V3;
1521  if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1522  return false;
1523  unsigned Sub2 = DI->getOperand(2).getImm();
1524  unsigned Sub4 = DI->getOperand(4).getImm();
1525  if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1526  TV = V1 | (V3 << 32);
1527  else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1528  TV = V3 | (V1 << 32);
1529  else
1530  llvm_unreachable("Unexpected form of REG_SEQUENCE");
1531  break;
1532  }
1533 
1534  default:
1535  return false;
1536  }
1537 
1538  // By now, we should have successfully obtained the immediate value defining
1539  // the register referenced in MO. Handle a potential use of a subregister.
1540  switch (MO.getSubReg()) {
1541  case Hexagon::isub_lo:
1542  Val = TV & 0xFFFFFFFFULL;
1543  break;
1544  case Hexagon::isub_hi:
1545  Val = (TV >> 32) & 0xFFFFFFFFULL;
1546  break;
1547  default:
1548  Val = TV;
1549  break;
1550  }
1551  return true;
1552 }
1553 
1554 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1555  if (MO.isImm()) {
1556  MO.setImm(Val);
1557  return;
1558  }
1559 
1560  assert(MO.isReg());
1561  unsigned R = MO.getReg();
1562  MachineInstr *DI = MRI->getVRegDef(R);
1563 
1564  const TargetRegisterClass *RC = MRI->getRegClass(R);
1565  unsigned NewR = MRI->createVirtualRegister(RC);
1566  MachineBasicBlock &B = *DI->getParent();
1567  DebugLoc DL = DI->getDebugLoc();
1568  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1569  MO.setReg(NewR);
1570 }
1571 
1572 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1573  // These two instructions are not extendable.
1574  if (CmpOpc == Hexagon::A4_cmpbeqi)
1575  return isUInt<8>(Imm);
1576  if (CmpOpc == Hexagon::A4_cmpbgti)
1577  return isInt<8>(Imm);
1578  // The rest of the comparison-with-immediate instructions are extendable.
1579  return true;
1580 }
1581 
1582 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1583  MachineBasicBlock *Header = L->getHeader();
1584  MachineBasicBlock *Latch = L->getLoopLatch();
1585  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1586 
1587  if (!(Header && Latch && ExitingBlock))
1588  return false;
1589 
1590  // These data structures follow the same concept as the corresponding
1591  // ones in findInductionRegister (where some comments are).
1592  typedef std::pair<unsigned,int64_t> RegisterBump;
1593  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1594  typedef std::set<RegisterInduction> RegisterInductionSet;
1595 
1596  // Register candidates for induction variables, with their associated bumps.
1597  RegisterInductionSet IndRegs;
1598 
1599  // Look for induction patterns:
1600  // vreg1 = PHI ..., [ latch, vreg2 ]
1601  // vreg2 = ADD vreg1, imm
1602  typedef MachineBasicBlock::instr_iterator instr_iterator;
1603  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1604  I != E && I->isPHI(); ++I) {
1605  MachineInstr *Phi = &*I;
1606 
1607  // Have a PHI instruction.
1608  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1609  if (Phi->getOperand(i+1).getMBB() != Latch)
1610  continue;
1611 
1612  unsigned PhiReg = Phi->getOperand(i).getReg();
1613  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1614 
1615  if (DI->getDesc().isAdd()) {
1616  // If the register operand to the add/sub is the PHI we are looking
1617  // at, this meets the induction pattern.
1618  unsigned IndReg = DI->getOperand(1).getReg();
1619  MachineOperand &Opnd2 = DI->getOperand(2);
1620  int64_t V;
1621  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1622  unsigned UpdReg = DI->getOperand(0).getReg();
1623  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1624  }
1625  }
1626  } // for (i)
1627  } // for (instr)
1628 
1629  if (IndRegs.empty())
1630  return false;
1631 
1632  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1634  // AnalyzeBranch returns true if it fails to analyze branch.
1635  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1636  if (NotAnalyzed || Cond.empty())
1637  return false;
1638 
1639  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1640  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1642  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1643  if (NotAnalyzed)
1644  return false;
1645 
1646  // Since latch is not the exiting block, the latch branch should be an
1647  // unconditional branch to the loop header.
1648  if (TB == Latch)
1649  TB = (LTB == Header) ? LTB : LFB;
1650  else
1651  FB = (LTB == Header) ? LTB : LFB;
1652  }
1653  if (TB != Header) {
1654  if (FB != Header) {
1655  // The latch/exit block does not go back to the header.
1656  return false;
1657  }
1658  // FB is the header (i.e., uncond. jump to branch header)
1659  // In this case, the LoopBody -> TB should not be a back edge otherwise
1660  // it could result in an infinite loop after conversion to hw_loop.
1661  // This case can happen when the Latch has two jumps like this:
1662  // Jmp_c OuterLoopHeader <-- TB
1663  // Jmp InnerLoopHeader <-- FB
1664  if (MDT->dominates(TB, FB))
1665  return false;
1666  }
1667 
1668  // Expecting a predicate register as a condition. It won't be a hardware
1669  // predicate register at this point yet, just a vreg.
1670  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1671  // into Cond, followed by the predicate register. For non-negated branches
1672  // it's just the register.
1673  unsigned CSz = Cond.size();
1674  if (CSz != 1 && CSz != 2)
1675  return false;
1676 
1677  if (!Cond[CSz-1].isReg())
1678  return false;
1679 
1680  unsigned P = Cond[CSz-1].getReg();
1681  MachineInstr *PredDef = MRI->getVRegDef(P);
1682 
1683  if (!PredDef->isCompare())
1684  return false;
1685 
1686  SmallSet<unsigned,2> CmpRegs;
1687  MachineOperand *CmpImmOp = nullptr;
1688 
1689  // Go over all operands to the compare and look for immediate and register
1690  // operands. Assume that if the compare has a single register use and a
1691  // single immediate operand, then the register is being compared with the
1692  // immediate value.
1693  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1694  MachineOperand &MO = PredDef->getOperand(i);
1695  if (MO.isReg()) {
1696  // Skip all implicit references. In one case there was:
1697  // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1698  if (MO.isImplicit())
1699  continue;
1700  if (MO.isUse()) {
1701  if (!isImmediate(MO)) {
1702  CmpRegs.insert(MO.getReg());
1703  continue;
1704  }
1705  // Consider the register to be the "immediate" operand.
1706  if (CmpImmOp)
1707  return false;
1708  CmpImmOp = &MO;
1709  }
1710  } else if (MO.isImm()) {
1711  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1712  return false;
1713  CmpImmOp = &MO;
1714  }
1715  }
1716 
1717  if (CmpRegs.empty())
1718  return false;
1719 
1720  // Check if the compared register follows the order we want. Fix if needed.
1721  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1722  I != E; ++I) {
1723  // This is a success. If the register used in the comparison is one that
1724  // we have identified as a bumped (updated) induction register, there is
1725  // nothing to do.
1726  if (CmpRegs.count(I->first))
1727  return true;
1728 
1729  // Otherwise, if the register being compared comes out of a PHI node,
1730  // and has been recognized as following the induction pattern, and is
1731  // compared against an immediate, we can fix it.
1732  const RegisterBump &RB = I->second;
1733  if (CmpRegs.count(RB.first)) {
1734  if (!CmpImmOp) {
1735  // If both operands to the compare instruction are registers, see if
1736  // it can be changed to use induction register as one of the operands.
1737  MachineInstr *IndI = nullptr;
1738  MachineInstr *nonIndI = nullptr;
1739  MachineOperand *IndMO = nullptr;
1740  MachineOperand *nonIndMO = nullptr;
1741 
1742  for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1743  MachineOperand &MO = PredDef->getOperand(i);
1744  if (MO.isReg() && MO.getReg() == RB.first) {
1745  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1746  << *(MRI->getVRegDef(I->first)));
1747  if (IndI)
1748  return false;
1749 
1750  IndI = MRI->getVRegDef(I->first);
1751  IndMO = &MO;
1752  } else if (MO.isReg()) {
1753  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1754  << *(MRI->getVRegDef(MO.getReg())));
1755  if (nonIndI)
1756  return false;
1757 
1758  nonIndI = MRI->getVRegDef(MO.getReg());
1759  nonIndMO = &MO;
1760  }
1761  }
1762  if (IndI && nonIndI &&
1763  nonIndI->getOpcode() == Hexagon::A2_addi &&
1764  nonIndI->getOperand(2).isImm() &&
1765  nonIndI->getOperand(2).getImm() == - RB.second) {
1766  bool Order = orderBumpCompare(IndI, PredDef);
1767  if (Order) {
1768  IndMO->setReg(I->first);
1769  nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1770  return true;
1771  }
1772  }
1773  return false;
1774  }
1775 
1776  // It is not valid to do this transformation on an unsigned comparison
1777  // because it may underflow.
1778  Comparison::Kind Cmp =
1779  getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1780  if (!Cmp || Comparison::isUnsigned(Cmp))
1781  return false;
1782 
1783  // If the register is being compared against an immediate, try changing
1784  // the compare instruction to use induction register and adjust the
1785  // immediate operand.
1786  int64_t CmpImm = getImmediate(*CmpImmOp);
1787  int64_t V = RB.second;
1788  // Handle Overflow (64-bit).
1789  if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1790  ((V < 0) && (CmpImm < INT64_MIN - V)))
1791  return false;
1792  CmpImm += V;
1793  // Most comparisons of register against an immediate value allow
1794  // the immediate to be constant-extended. There are some exceptions
1795  // though. Make sure the new combination will work.
1796  if (CmpImmOp->isImm())
1797  if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1798  return false;
1799 
1800  // Make sure that the compare happens after the bump. Otherwise,
1801  // after the fixup, the compare would use a yet-undefined register.
1802  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1803  bool Order = orderBumpCompare(BumpI, PredDef);
1804  if (!Order)
1805  return false;
1806 
1807  // Finally, fix the compare instruction.
1808  setImmediate(*CmpImmOp, CmpImm);
1809  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1810  MachineOperand &MO = PredDef->getOperand(i);
1811  if (MO.isReg() && MO.getReg() == RB.first) {
1812  MO.setReg(I->first);
1813  return true;
1814  }
1815  }
1816  }
1817  }
1818 
1819  return false;
1820 }
1821 
1822 /// createPreheaderForLoop - Create a preheader for a given loop.
1823 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1824  MachineLoop *L) {
1825  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1826  return TmpPH;
1827  if (!HWCreatePreheader)
1828  return nullptr;
1829 
1830  MachineBasicBlock *Header = L->getHeader();
1831  MachineBasicBlock *Latch = L->getLoopLatch();
1832  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1833  MachineFunction *MF = Header->getParent();
1834  DebugLoc DL;
1835 
1836 #ifndef NDEBUG
1837  if ((PHFn != "") && (PHFn != MF->getName()))
1838  return nullptr;
1839 #endif
1840 
1841  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1842  return nullptr;
1843 
1844  typedef MachineBasicBlock::instr_iterator instr_iterator;
1845 
1846  // Verify that all existing predecessors have analyzable branches
1847  // (or no branches at all).
1848  typedef std::vector<MachineBasicBlock*> MBBVector;
1849  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1851  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1852 
1853  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1854  return nullptr;
1855 
1856  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1857  MachineBasicBlock *PB = *I;
1858  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1859  if (NotAnalyzed)
1860  return nullptr;
1861  }
1862 
1864  MF->insert(Header->getIterator(), NewPH);
1865 
1866  if (Header->pred_size() > 2) {
1867  // Ensure that the header has only two predecessors: the preheader and
1868  // the loop latch. Any additional predecessors of the header should
1869  // join at the newly created preheader. Inspect all PHI nodes from the
1870  // header and create appropriate corresponding PHI nodes in the preheader.
1871 
1872  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1873  I != E && I->isPHI(); ++I) {
1874  MachineInstr *PN = &*I;
1875 
1876  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1877  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1878  NewPH->insert(NewPH->end(), NewPN);
1879 
1880  unsigned PR = PN->getOperand(0).getReg();
1881  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1882  unsigned NewPR = MRI->createVirtualRegister(RC);
1883  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1884 
1885  // Copy all non-latch operands of a header's PHI node to the newly
1886  // created PHI node in the preheader.
1887  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1888  unsigned PredR = PN->getOperand(i).getReg();
1889  unsigned PredRSub = PN->getOperand(i).getSubReg();
1890  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1891  if (PredB == Latch)
1892  continue;
1893 
1894  MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1895  MO.setSubReg(PredRSub);
1896  NewPN->addOperand(MO);
1897  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1898  }
1899 
1900  // Remove copied operands from the old PHI node and add the value
1901  // coming from the preheader's PHI.
1902  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1903  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1904  if (PredB != Latch) {
1905  PN->RemoveOperand(i+1);
1906  PN->RemoveOperand(i);
1907  }
1908  }
1909  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1911  }
1912  } else {
1913  assert(Header->pred_size() == 2);
1914 
1915  // The header has only two predecessors, but the non-latch predecessor
1916  // is not a preheader (e.g. it has other successors, etc.)
1917  // In such a case we don't need any extra PHI nodes in the new preheader,
1918  // all we need is to adjust existing PHIs in the header to now refer to
1919  // the new preheader.
1920  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1921  I != E && I->isPHI(); ++I) {
1922  MachineInstr *PN = &*I;
1923  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1924  MachineOperand &MO = PN->getOperand(i+1);
1925  if (MO.getMBB() != Latch)
1926  MO.setMBB(NewPH);
1927  }
1928  }
1929  }
1930 
1931  // "Reroute" the CFG edges to link in the new preheader.
1932  // If any of the predecessors falls through to the header, insert a branch
1933  // to the new preheader in that place.
1936 
1937  TB = FB = nullptr;
1938 
1939  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1940  MachineBasicBlock *PB = *I;
1941  if (PB != Latch) {
1942  Tmp2.clear();
1943  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1944  (void)NotAnalyzed; // suppress compiler warning
1945  assert (!NotAnalyzed && "Should be analyzable!");
1946  if (TB != Header && (Tmp2.empty() || FB != Header))
1947  TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1948  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1949  }
1950  }
1951 
1952  // It can happen that the latch block will fall through into the header.
1953  // Insert an unconditional branch to the header.
1954  TB = FB = nullptr;
1955  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1956  (void)LatchNotAnalyzed; // suppress compiler warning
1957  assert (!LatchNotAnalyzed && "Should be analyzable!");
1958  if (!TB && !FB)
1959  TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1960 
1961  // Finally, the branch from the preheader to the header.
1962  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1963  NewPH->addSuccessor(Header);
1964 
1965  MachineLoop *ParentLoop = L->getParentLoop();
1966  if (ParentLoop)
1967  ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1968 
1969  // Update the dominator information with the new preheader.
1970  if (MDT) {
1971  if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1972  if (MachineDomTreeNode *DHN = HN->getIDom()) {
1973  MDT->addNewBlock(NewPH, DHN->getBlock());
1974  MDT->changeImmediateDominator(Header, NewPH);
1975  }
1976  }
1977  }
1978 
1979  return NewPH;
1980 }
MachineLoop * L
static bool isReg(const MCInst &MI, unsigned OpNo)
bool isImplicit() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
instr_iterator instr_end()
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader ""instructions"))
STATISTIC(NumFunctions,"Total number of functions")
size_t i
MachineBasicBlock * getMBB() const
void initializeHexagonHardwareLoopsPass(PassRegistry &)
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static int Counter
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SmallVector< MachineBasicBlock *, 4 > MBBVector
FunctionPass * createHexagonHardwareLoops()
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
BlockT * getHeader() const
Definition: LoopInfo.h:102
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops,"hwloops","Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
DomTreeNodeBase< NodeT > * getIDom() const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
Hexagon Hardware Loops
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:55
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
static F t[256]
bool isPHI() const
Definition: MachineInstr.h:786
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:242
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
defusechain_iterator - This class provides iterator support for machine operands in the function that...
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
MachineBasicBlock * findLoopControlBlock()
Find the block that contains the loop control variable and the loop test.
#define F(x, y, z)
Definition: MD5.cpp:51
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:188
MachineBasicBlock * MBB
Base class for the actual dominator tree node.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
std::vector< MachineBasicBlock * >::iterator pred_iterator
#define EQ(a, b)
Definition: regexec.c:112
int64_t getImm() const
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
iterator begin() const
Definition: LoopInfo.h:132
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:309
unsigned const MachineRegisterInfo * MRI
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1356
MachineInstrBuilder & UseMI
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
void setMBB(MachineBasicBlock *MBB)
iterator end() const
Definition: LoopInfo.h:133
bool isCopy() const
Definition: MachineInstr.h:807
Represent the analysis usage information of a pass.
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static const unsigned End
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
self_iterator getIterator()
Definition: ilist_node.h:81
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
unsigned getSubReg() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:239
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Hexagon Hardware false
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
void dump(const TargetInstrInfo *TII=nullptr) const
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:485
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
Representation of each machine instruction.
Definition: MachineInstr.h:52
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
void setSubReg(unsigned subReg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const unsigned Kind
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
bool isDebug() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:40
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:129
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const