LLVM  6.0.0svn
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction. The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 // - Countable loops (w/ ind. var for a trip count)
22 // - Assumes loops are normalized by IndVarSimplify
23 // - Try inner-most loops first
24 // - No function calls in loops.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "HexagonInstrInfo.h"
29 #include "HexagonSubtarget.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DebugLoc.h"
48 #include "llvm/Pass.h"
50 #include "llvm/Support/Debug.h"
54 #include <cassert>
55 #include <cstdint>
56 #include <cstdlib>
57 #include <iterator>
58 #include <map>
59 #include <set>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "hwloops"
67 
68 #ifndef NDEBUG
69 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70 
71 // Option to create preheader only for a specific function.
72 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73  cl::init(""));
74 #endif
75 
76 // Option to create a preheader if one doesn't exist.
77 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78  cl::Hidden, cl::init(true),
79  cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80 
81 // Turn it off by default. If a preheader block is not created here, the
82 // software pipeliner may be unable to find a block suitable to serve as
83 // a preheader. In that case SWP will not run.
84 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
85  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
86  "instructions"));
87 
88 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89 
90 namespace llvm {
91 
94 
95 } // end namespace llvm
96 
97 namespace {
98 
99  class CountValue;
100 
101  struct HexagonHardwareLoops : public MachineFunctionPass {
102  MachineLoopInfo *MLI;
105  const HexagonInstrInfo *TII;
106  const HexagonRegisterInfo *TRI;
107 #ifndef NDEBUG
108  static int Counter;
109 #endif
110 
111  public:
112  static char ID;
113 
114  HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115 
116  bool runOnMachineFunction(MachineFunction &MF) override;
117 
118  StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119 
120  void getAnalysisUsage(AnalysisUsage &AU) const override {
124  }
125 
126  private:
127  using LoopFeederMap = std::map<unsigned, MachineInstr *>;
128 
129  /// Kinds of comparisons in the compare instructions.
130  struct Comparison {
131  enum Kind {
132  EQ = 0x01,
133  NE = 0x02,
134  L = 0x04,
135  G = 0x08,
136  U = 0x40,
137  LTs = L,
138  LEs = L | EQ,
139  GTs = G,
140  GEs = G | EQ,
141  LTu = L | U,
142  LEu = L | EQ | U,
143  GTu = G | U,
144  GEu = G | EQ | U
145  };
146 
147  static Kind getSwappedComparison(Kind Cmp) {
148  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
149  if ((Cmp & L) || (Cmp & G))
150  return (Kind)(Cmp ^ (L|G));
151  return Cmp;
152  }
153 
154  static Kind getNegatedComparison(Kind Cmp) {
155  if ((Cmp & L) || (Cmp & G))
156  return (Kind)((Cmp ^ (L | G)) ^ EQ);
157  if ((Cmp & NE) || (Cmp & EQ))
158  return (Kind)(Cmp ^ (EQ | NE));
159  return (Kind)0;
160  }
161 
162  static bool isSigned(Kind Cmp) {
163  return (Cmp & (L | G) && !(Cmp & U));
164  }
165 
166  static bool isUnsigned(Kind Cmp) {
167  return (Cmp & U);
168  }
169  };
170 
171  /// \brief Find the register that contains the loop controlling
172  /// induction variable.
173  /// If successful, it will return true and set the \p Reg, \p IVBump
174  /// and \p IVOp arguments. Otherwise it will return false.
175  /// The returned induction register is the register R that follows the
176  /// following induction pattern:
177  /// loop:
178  /// R = phi ..., [ R.next, LatchBlock ]
179  /// R.next = R + #bump
180  /// if (R.next < #N) goto loop
181  /// IVBump is the immediate value added to R, and IVOp is the instruction
182  /// "R.next = R + #bump".
183  bool findInductionRegister(MachineLoop *L, unsigned &Reg,
184  int64_t &IVBump, MachineInstr *&IVOp) const;
185 
186  /// \brief Return the comparison kind for the specified opcode.
187  Comparison::Kind getComparisonKind(unsigned CondOpc,
188  MachineOperand *InitialValue,
189  const MachineOperand *Endvalue,
190  int64_t IVBump) const;
191 
192  /// \brief Analyze the statements in a loop to determine if the loop
193  /// has a computable trip count and, if so, return a value that represents
194  /// the trip count expression.
195  CountValue *getLoopTripCount(MachineLoop *L,
197 
198  /// \brief Return the expression that represents the number of times
199  /// a loop iterates. The function takes the operands that represent the
200  /// loop start value, loop end value, and induction value. Based upon
201  /// these operands, the function attempts to compute the trip count.
202  /// If the trip count is not directly available (as an immediate value,
203  /// or a register), the function will attempt to insert computation of it
204  /// to the loop's preheader.
205  CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
206  const MachineOperand *End, unsigned IVReg,
207  int64_t IVBump, Comparison::Kind Cmp) const;
208 
209  /// \brief Return true if the instruction is not valid within a hardware
210  /// loop.
211  bool isInvalidLoopOperation(const MachineInstr *MI,
212  bool IsInnerHWLoop) const;
213 
214  /// \brief Return true if the loop contains an instruction that inhibits
215  /// using the hardware loop.
216  bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217 
218  /// \brief Given a loop, check if we can convert it to a hardware loop.
219  /// If so, then perform the conversion and return true.
220  bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221 
222  /// \brief Return true if the instruction is now dead.
223  bool isDead(const MachineInstr *MI,
224  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225 
226  /// \brief Remove the instruction if it is now dead.
227  void removeIfDead(MachineInstr *MI);
228 
229  /// \brief Make sure that the "bump" instruction executes before the
230  /// compare. We need that for the IV fixup, so that the compare
231  /// instruction would not use a bumped value that has not yet been
232  /// defined. If the instructions are out of order, try to reorder them.
233  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234 
235  /// \brief Return true if MO and MI pair is visited only once. If visited
236  /// more than once, this indicates there is recursion. In such a case,
237  /// return false.
238  bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
239  const MachineOperand *MO,
240  LoopFeederMap &LoopFeederPhi) const;
241 
242  /// \brief Return true if the Phi may generate a value that may underflow,
243  /// or may wrap.
244  bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
246  LoopFeederMap &LoopFeederPhi) const;
247 
248  /// \brief Return true if the induction variable may underflow an unsigned
249  /// value in the first iteration.
250  bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
251  const MachineOperand *EndVal,
253  LoopFeederMap &LoopFeederPhi) const;
254 
255  /// \brief Check if the given operand has a compile-time known constant
256  /// value. Return true if yes, and false otherwise. When returning true, set
257  /// Val to the corresponding constant value.
258  bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259 
260  /// \brief Check if the operand has a compile-time known constant value.
261  bool isImmediate(const MachineOperand &MO) const {
262  int64_t V;
263  return checkForImmediate(MO, V);
264  }
265 
266  /// \brief Return the immediate for the specified operand.
267  int64_t getImmediate(const MachineOperand &MO) const {
268  int64_t V;
269  if (!checkForImmediate(MO, V))
270  llvm_unreachable("Invalid operand");
271  return V;
272  }
273 
274  /// \brief Reset the given machine operand to now refer to a new immediate
275  /// value. Assumes that the operand was already referencing an immediate
276  /// value, either directly, or via a register.
277  void setImmediate(MachineOperand &MO, int64_t Val);
278 
279  /// \brief Fix the data flow of the induction variable.
280  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
281  /// |
282  /// +-> back to phi
283  /// where "bump" is the increment of the induction variable:
284  /// iv = iv + #const.
285  /// Due to some prior code transformations, the actual flow may look
286  /// like this:
287  /// phi -+-> bump ---> back to phi
288  /// |
289  /// +-> comparison-in-latch (against upper_bound-bump),
290  /// i.e. the comparison that controls the loop execution may be using
291  /// the value of the induction variable from before the increment.
292  ///
293  /// Return true if the loop's flow is the desired one (i.e. it's
294  /// either been fixed, or no fixing was necessary).
295  /// Otherwise, return false. This can happen if the induction variable
296  /// couldn't be identified, or if the value in the latch's comparison
297  /// cannot be adjusted to reflect the post-bump value.
298  bool fixupInductionVariable(MachineLoop *L);
299 
300  /// \brief Given a loop, if it does not have a preheader, create one.
301  /// Return the block that is the preheader.
302  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
303  };
304 
305  char HexagonHardwareLoops::ID = 0;
306 #ifndef NDEBUG
307  int HexagonHardwareLoops::Counter = 0;
308 #endif
309 
310  /// \brief Abstraction for a trip count of a loop. A smaller version
311  /// of the MachineOperand class without the concerns of changing the
312  /// operand representation.
313  class CountValue {
314  public:
315  enum CountValueType {
316  CV_Register,
317  CV_Immediate
318  };
319 
320  private:
321  CountValueType Kind;
322  union Values {
323  struct {
324  unsigned Reg;
325  unsigned Sub;
326  } R;
327  unsigned ImmVal;
328  } Contents;
329 
330  public:
331  explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
332  Kind = t;
333  if (Kind == CV_Register) {
334  Contents.R.Reg = v;
335  Contents.R.Sub = u;
336  } else {
337  Contents.ImmVal = v;
338  }
339  }
340 
341  bool isReg() const { return Kind == CV_Register; }
342  bool isImm() const { return Kind == CV_Immediate; }
343 
344  unsigned getReg() const {
345  assert(isReg() && "Wrong CountValue accessor");
346  return Contents.R.Reg;
347  }
348 
349  unsigned getSubReg() const {
350  assert(isReg() && "Wrong CountValue accessor");
351  return Contents.R.Sub;
352  }
353 
354  unsigned getImm() const {
355  assert(isImm() && "Wrong CountValue accessor");
356  return Contents.ImmVal;
357  }
358 
359  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
360  if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
361  if (isImm()) { OS << Contents.ImmVal; }
362  }
363  };
364 
365 } // end anonymous namespace
366 
367 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
368  "Hexagon Hardware Loops", false, false)
371 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
372  "Hexagon Hardware Loops", false, false)
373 
375  return new HexagonHardwareLoops();
376 }
377 
378 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
379  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
380  if (skipFunction(*MF.getFunction()))
381  return false;
382 
383  bool Changed = false;
384 
385  MLI = &getAnalysis<MachineLoopInfo>();
386  MRI = &MF.getRegInfo();
387  MDT = &getAnalysis<MachineDominatorTree>();
389  TII = HST.getInstrInfo();
390  TRI = HST.getRegisterInfo();
391 
392  for (auto &L : *MLI)
393  if (!L->getParentLoop()) {
394  bool L0Used = false;
395  bool L1Used = false;
396  Changed |= convertToHardwareLoop(L, L0Used, L1Used);
397  }
398 
399  return Changed;
400 }
401 
402 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
403  unsigned &Reg,
404  int64_t &IVBump,
405  MachineInstr *&IVOp
406  ) const {
407  MachineBasicBlock *Header = L->getHeader();
408  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
409  MachineBasicBlock *Latch = L->getLoopLatch();
410  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
411  if (!Header || !Preheader || !Latch || !ExitingBlock)
412  return false;
413 
414  // This pair represents an induction register together with an immediate
415  // value that will be added to it in each loop iteration.
416  using RegisterBump = std::pair<unsigned, int64_t>;
417 
418  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
419  // from an induction operation
420  // R.next = R + bump
421  // where bump is an immediate value.
422  using InductionMap = std::map<unsigned, RegisterBump>;
423 
424  InductionMap IndMap;
425 
426  using instr_iterator = MachineBasicBlock::instr_iterator;
427 
428  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
429  I != E && I->isPHI(); ++I) {
430  MachineInstr *Phi = &*I;
431 
432  // Have a PHI instruction. Get the operand that corresponds to the
433  // latch block, and see if is a result of an addition of form "reg+imm",
434  // where the "reg" is defined by the PHI node we are looking at.
435  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
436  if (Phi->getOperand(i+1).getMBB() != Latch)
437  continue;
438 
439  unsigned PhiOpReg = Phi->getOperand(i).getReg();
440  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
441 
442  if (DI->getDesc().isAdd()) {
443  // If the register operand to the add is the PHI we're looking at, this
444  // meets the induction pattern.
445  unsigned IndReg = DI->getOperand(1).getReg();
446  MachineOperand &Opnd2 = DI->getOperand(2);
447  int64_t V;
448  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
449  unsigned UpdReg = DI->getOperand(0).getReg();
450  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
451  }
452  }
453  } // for (i)
454  } // for (instr)
455 
457  MachineBasicBlock *TB = nullptr, *FB = nullptr;
458  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
459  if (NotAnalyzed)
460  return false;
461 
462  unsigned PredR, PredPos, PredRegFlags;
463  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
464  return false;
465 
466  MachineInstr *PredI = MRI->getVRegDef(PredR);
467  if (!PredI->isCompare())
468  return false;
469 
470  unsigned CmpReg1 = 0, CmpReg2 = 0;
471  int CmpImm = 0, CmpMask = 0;
472  bool CmpAnalyzed =
473  TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
474  // Fail if the compare was not analyzed, or it's not comparing a register
475  // with an immediate value. Not checking the mask here, since we handle
476  // the individual compare opcodes (including A4_cmpb*) later on.
477  if (!CmpAnalyzed)
478  return false;
479 
480  // Exactly one of the input registers to the comparison should be among
481  // the induction registers.
482  InductionMap::iterator IndMapEnd = IndMap.end();
483  InductionMap::iterator F = IndMapEnd;
484  if (CmpReg1 != 0) {
485  InductionMap::iterator F1 = IndMap.find(CmpReg1);
486  if (F1 != IndMapEnd)
487  F = F1;
488  }
489  if (CmpReg2 != 0) {
490  InductionMap::iterator F2 = IndMap.find(CmpReg2);
491  if (F2 != IndMapEnd) {
492  if (F != IndMapEnd)
493  return false;
494  F = F2;
495  }
496  }
497  if (F == IndMapEnd)
498  return false;
499 
500  Reg = F->second.first;
501  IVBump = F->second.second;
502  IVOp = MRI->getVRegDef(F->first);
503  return true;
504 }
505 
506 // Return the comparison kind for the specified opcode.
507 HexagonHardwareLoops::Comparison::Kind
508 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
509  MachineOperand *InitialValue,
510  const MachineOperand *EndValue,
511  int64_t IVBump) const {
513  switch (CondOpc) {
514  case Hexagon::C2_cmpeq:
515  case Hexagon::C2_cmpeqi:
516  case Hexagon::C2_cmpeqp:
517  Cmp = Comparison::EQ;
518  break;
519  case Hexagon::C4_cmpneq:
520  case Hexagon::C4_cmpneqi:
521  Cmp = Comparison::NE;
522  break;
523  case Hexagon::C2_cmplt:
524  Cmp = Comparison::LTs;
525  break;
526  case Hexagon::C2_cmpltu:
527  Cmp = Comparison::LTu;
528  break;
529  case Hexagon::C4_cmplte:
530  case Hexagon::C4_cmpltei:
531  Cmp = Comparison::LEs;
532  break;
533  case Hexagon::C4_cmplteu:
534  case Hexagon::C4_cmplteui:
535  Cmp = Comparison::LEu;
536  break;
537  case Hexagon::C2_cmpgt:
538  case Hexagon::C2_cmpgti:
539  case Hexagon::C2_cmpgtp:
540  Cmp = Comparison::GTs;
541  break;
542  case Hexagon::C2_cmpgtu:
543  case Hexagon::C2_cmpgtui:
544  case Hexagon::C2_cmpgtup:
545  Cmp = Comparison::GTu;
546  break;
547  case Hexagon::C2_cmpgei:
548  Cmp = Comparison::GEs;
549  break;
550  case Hexagon::C2_cmpgeui:
551  Cmp = Comparison::GEs;
552  break;
553  default:
554  return (Comparison::Kind)0;
555  }
556  return Cmp;
557 }
558 
559 /// \brief Analyze the statements in a loop to determine if the loop has
560 /// a computable trip count and, if so, return a value that represents
561 /// the trip count expression.
562 ///
563 /// This function iterates over the phi nodes in the loop to check for
564 /// induction variable patterns that are used in the calculation for
565 /// the number of time the loop is executed.
566 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
568  MachineBasicBlock *TopMBB = L->getTopBlock();
570  assert(PI != TopMBB->pred_end() &&
571  "Loop must have more than one incoming edge!");
572  MachineBasicBlock *Backedge = *PI++;
573  if (PI == TopMBB->pred_end()) // dead loop?
574  return nullptr;
575  MachineBasicBlock *Incoming = *PI++;
576  if (PI != TopMBB->pred_end()) // multiple backedges?
577  return nullptr;
578 
579  // Make sure there is one incoming and one backedge and determine which
580  // is which.
581  if (L->contains(Incoming)) {
582  if (L->contains(Backedge))
583  return nullptr;
584  std::swap(Incoming, Backedge);
585  } else if (!L->contains(Backedge))
586  return nullptr;
587 
588  // Look for the cmp instruction to determine if we can get a useful trip
589  // count. The trip count can be either a register or an immediate. The
590  // location of the value depends upon the type (reg or imm).
591  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
592  if (!ExitingBlock)
593  return nullptr;
594 
595  unsigned IVReg = 0;
596  int64_t IVBump = 0;
597  MachineInstr *IVOp;
598  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
599  if (!FoundIV)
600  return nullptr;
601 
602  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
603 
604  MachineOperand *InitialValue = nullptr;
605  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
606  MachineBasicBlock *Latch = L->getLoopLatch();
607  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
608  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
609  if (MBB == Preheader)
610  InitialValue = &IV_Phi->getOperand(i);
611  else if (MBB == Latch)
612  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
613  }
614  if (!InitialValue)
615  return nullptr;
616 
618  MachineBasicBlock *TB = nullptr, *FB = nullptr;
619  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
620  if (NotAnalyzed)
621  return nullptr;
622 
623  MachineBasicBlock *Header = L->getHeader();
624  // TB must be non-null. If FB is also non-null, one of them must be
625  // the header. Otherwise, branch to TB could be exiting the loop, and
626  // the fall through can go to the header.
627  assert (TB && "Exit block without a branch?");
628  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
629  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
631  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
632  if (NotAnalyzed)
633  return nullptr;
634  if (TB == Latch)
635  TB = (LTB == Header) ? LTB : LFB;
636  else
637  FB = (LTB == Header) ? LTB: LFB;
638  }
639  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
640  if (!TB || (FB && TB != Header && FB != Header))
641  return nullptr;
642 
643  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
644  // to put imm(0), followed by P in the vector Cond.
645  // If TB is not the header, it means that the "not-taken" path must lead
646  // to the header.
647  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
648  unsigned PredReg, PredPos, PredRegFlags;
649  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
650  return nullptr;
651  MachineInstr *CondI = MRI->getVRegDef(PredReg);
652  unsigned CondOpc = CondI->getOpcode();
653 
654  unsigned CmpReg1 = 0, CmpReg2 = 0;
655  int Mask = 0, ImmValue = 0;
656  bool AnalyzedCmp =
657  TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
658  if (!AnalyzedCmp)
659  return nullptr;
660 
661  // The comparison operator type determines how we compute the loop
662  // trip count.
663  OldInsts.push_back(CondI);
664  OldInsts.push_back(IVOp);
665 
666  // Sadly, the following code gets information based on the position
667  // of the operands in the compare instruction. This has to be done
668  // this way, because the comparisons check for a specific relationship
669  // between the operands (e.g. is-less-than), rather than to find out
670  // what relationship the operands are in (as on PPC).
671  Comparison::Kind Cmp;
672  bool isSwapped = false;
673  const MachineOperand &Op1 = CondI->getOperand(1);
674  const MachineOperand &Op2 = CondI->getOperand(2);
675  const MachineOperand *EndValue = nullptr;
676 
677  if (Op1.isReg()) {
678  if (Op2.isImm() || Op1.getReg() == IVReg)
679  EndValue = &Op2;
680  else {
681  EndValue = &Op1;
682  isSwapped = true;
683  }
684  }
685 
686  if (!EndValue)
687  return nullptr;
688 
689  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
690  if (!Cmp)
691  return nullptr;
692  if (Negated)
693  Cmp = Comparison::getNegatedComparison(Cmp);
694  if (isSwapped)
695  Cmp = Comparison::getSwappedComparison(Cmp);
696 
697  if (InitialValue->isReg()) {
698  unsigned R = InitialValue->getReg();
699  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
700  if (!MDT->properlyDominates(DefBB, Header)) {
701  int64_t V;
702  if (!checkForImmediate(*InitialValue, V))
703  return nullptr;
704  }
705  OldInsts.push_back(MRI->getVRegDef(R));
706  }
707  if (EndValue->isReg()) {
708  unsigned R = EndValue->getReg();
709  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
710  if (!MDT->properlyDominates(DefBB, Header)) {
711  int64_t V;
712  if (!checkForImmediate(*EndValue, V))
713  return nullptr;
714  }
715  OldInsts.push_back(MRI->getVRegDef(R));
716  }
717 
718  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
719 }
720 
721 /// \brief Helper function that returns the expression that represents the
722 /// number of times a loop iterates. The function takes the operands that
723 /// represent the loop start value, loop end value, and induction value.
724 /// Based upon these operands, the function attempts to compute the trip count.
725 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
726  const MachineOperand *Start,
727  const MachineOperand *End,
728  unsigned IVReg,
729  int64_t IVBump,
730  Comparison::Kind Cmp) const {
731  // Cannot handle comparison EQ, i.e. while (A == B).
732  if (Cmp == Comparison::EQ)
733  return nullptr;
734 
735  // Check if either the start or end values are an assignment of an immediate.
736  // If so, use the immediate value rather than the register.
737  if (Start->isReg()) {
738  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
739  if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
740  StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
741  Start = &StartValInstr->getOperand(1);
742  }
743  if (End->isReg()) {
744  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
745  if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
746  EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
747  End = &EndValInstr->getOperand(1);
748  }
749 
750  if (!Start->isReg() && !Start->isImm())
751  return nullptr;
752  if (!End->isReg() && !End->isImm())
753  return nullptr;
754 
755  bool CmpLess = Cmp & Comparison::L;
756  bool CmpGreater = Cmp & Comparison::G;
757  bool CmpHasEqual = Cmp & Comparison::EQ;
758 
759  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
760  if (CmpLess && IVBump < 0)
761  // Loop going while iv is "less" with the iv value going down. Must wrap.
762  return nullptr;
763 
764  if (CmpGreater && IVBump > 0)
765  // Loop going while iv is "greater" with the iv value going up. Must wrap.
766  return nullptr;
767 
768  // Phis that may feed into the loop.
769  LoopFeederMap LoopFeederPhi;
770 
771  // Check if the initial value may be zero and can be decremented in the first
772  // iteration. If the value is zero, the endloop instruction will not decrement
773  // the loop counter, so we shouldn't generate a hardware loop in this case.
774  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
775  LoopFeederPhi))
776  return nullptr;
777 
778  if (Start->isImm() && End->isImm()) {
779  // Both, start and end are immediates.
780  int64_t StartV = Start->getImm();
781  int64_t EndV = End->getImm();
782  int64_t Dist = EndV - StartV;
783  if (Dist == 0)
784  return nullptr;
785 
786  bool Exact = (Dist % IVBump) == 0;
787 
788  if (Cmp == Comparison::NE) {
789  if (!Exact)
790  return nullptr;
791  if ((Dist < 0) ^ (IVBump < 0))
792  return nullptr;
793  }
794 
795  // For comparisons that include the final value (i.e. include equality
796  // with the final value), we need to increase the distance by 1.
797  if (CmpHasEqual)
798  Dist = Dist > 0 ? Dist+1 : Dist-1;
799 
800  // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
801  // CmpGreater should imply Dist < 0. These conditions could actually
802  // fail, for example, in unreachable code (which may still appear to be
803  // reachable in the CFG).
804  if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
805  return nullptr;
806 
807  // "Normalized" distance, i.e. with the bump set to +-1.
808  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
809  : (-Dist + (-IVBump - 1)) / (-IVBump);
810  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
811 
812  uint64_t Count = Dist1;
813 
814  if (Count > 0xFFFFFFFFULL)
815  return nullptr;
816 
817  return new CountValue(CountValue::CV_Immediate, Count);
818  }
819 
820  // A general case: Start and End are some values, but the actual
821  // iteration count may not be available. If it is not, insert
822  // a computation of it into the preheader.
823 
824  // If the induction variable bump is not a power of 2, quit.
825  // Othwerise we'd need a general integer division.
826  if (!isPowerOf2_64(std::abs(IVBump)))
827  return nullptr;
828 
830  assert (PH && "Should have a preheader by now");
832  DebugLoc DL;
833  if (InsertPos != PH->end())
834  DL = InsertPos->getDebugLoc();
835 
836  // If Start is an immediate and End is a register, the trip count
837  // will be "reg - imm". Hexagon's "subtract immediate" instruction
838  // is actually "reg + -imm".
839 
840  // If the loop IV is going downwards, i.e. if the bump is negative,
841  // then the iteration count (computed as End-Start) will need to be
842  // negated. To avoid the negation, just swap Start and End.
843  if (IVBump < 0) {
844  std::swap(Start, End);
845  IVBump = -IVBump;
846  }
847  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
848  // Signedness, and "including equality" are preserved.
849 
850  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
851  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
852 
853  int64_t StartV = 0, EndV = 0;
854  if (Start->isImm())
855  StartV = Start->getImm();
856  if (End->isImm())
857  EndV = End->getImm();
858 
859  int64_t AdjV = 0;
860  // To compute the iteration count, we would need this computation:
861  // Count = (End - Start + (IVBump-1)) / IVBump
862  // or, when CmpHasEqual:
863  // Count = (End - Start + (IVBump-1)+1) / IVBump
864  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
865  // generating an instruction specifically to add it if we can adjust
866  // the immediate values for Start or End.
867 
868  if (CmpHasEqual) {
869  // Need to add 1 to the total iteration count.
870  if (Start->isImm())
871  StartV--;
872  else if (End->isImm())
873  EndV++;
874  else
875  AdjV += 1;
876  }
877 
878  if (Cmp != Comparison::NE) {
879  if (Start->isImm())
880  StartV -= (IVBump-1);
881  else if (End->isImm())
882  EndV += (IVBump-1);
883  else
884  AdjV += (IVBump-1);
885  }
886 
887  unsigned R = 0, SR = 0;
888  if (Start->isReg()) {
889  R = Start->getReg();
890  SR = Start->getSubReg();
891  } else {
892  R = End->getReg();
893  SR = End->getSubReg();
894  }
895  const TargetRegisterClass *RC = MRI->getRegClass(R);
896  // Hardware loops cannot handle 64-bit registers. If it's a double
897  // register, it has to have a subregister.
898  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
899  return nullptr;
900  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
901 
902  // Compute DistR (register with the distance between Start and End).
903  unsigned DistR, DistSR;
904 
905  // Avoid special case, where the start value is an imm(0).
906  if (Start->isImm() && StartV == 0) {
907  DistR = End->getReg();
908  DistSR = End->getSubReg();
909  } else {
910  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
911  (RegToImm ? TII->get(Hexagon::A2_subri) :
912  TII->get(Hexagon::A2_addi));
913  if (RegToReg || RegToImm) {
914  unsigned SubR = MRI->createVirtualRegister(IntRC);
915  MachineInstrBuilder SubIB =
916  BuildMI(*PH, InsertPos, DL, SubD, SubR);
917 
918  if (RegToReg)
919  SubIB.addReg(End->getReg(), 0, End->getSubReg())
920  .addReg(Start->getReg(), 0, Start->getSubReg());
921  else
922  SubIB.addImm(EndV)
923  .addReg(Start->getReg(), 0, Start->getSubReg());
924  DistR = SubR;
925  } else {
926  // If the loop has been unrolled, we should use the original loop count
927  // instead of recalculating the value. This will avoid additional
928  // 'Add' instruction.
929  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
930  if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
931  EndValInstr->getOperand(2).getImm() == StartV) {
932  DistR = EndValInstr->getOperand(1).getReg();
933  } else {
934  unsigned SubR = MRI->createVirtualRegister(IntRC);
935  MachineInstrBuilder SubIB =
936  BuildMI(*PH, InsertPos, DL, SubD, SubR);
937  SubIB.addReg(End->getReg(), 0, End->getSubReg())
938  .addImm(-StartV);
939  DistR = SubR;
940  }
941  }
942  DistSR = 0;
943  }
944 
945  // From DistR, compute AdjR (register with the adjusted distance).
946  unsigned AdjR, AdjSR;
947 
948  if (AdjV == 0) {
949  AdjR = DistR;
950  AdjSR = DistSR;
951  } else {
952  // Generate CountR = ADD DistR, AdjVal
953  unsigned AddR = MRI->createVirtualRegister(IntRC);
954  MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
955  BuildMI(*PH, InsertPos, DL, AddD, AddR)
956  .addReg(DistR, 0, DistSR)
957  .addImm(AdjV);
958 
959  AdjR = AddR;
960  AdjSR = 0;
961  }
962 
963  // From AdjR, compute CountR (register with the final count).
964  unsigned CountR, CountSR;
965 
966  if (IVBump == 1) {
967  CountR = AdjR;
968  CountSR = AdjSR;
969  } else {
970  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
971  unsigned Shift = Log2_32(IVBump);
972 
973  // Generate NormR = LSR DistR, Shift.
974  unsigned LsrR = MRI->createVirtualRegister(IntRC);
975  const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
976  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
977  .addReg(AdjR, 0, AdjSR)
978  .addImm(Shift);
979 
980  CountR = LsrR;
981  CountSR = 0;
982  }
983 
984  return new CountValue(CountValue::CV_Register, CountR, CountSR);
985 }
986 
987 /// \brief Return true if the operation is invalid within hardware loop.
988 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
989  bool IsInnerHWLoop) const {
990  // Call is not allowed because the callee may use a hardware loop except for
991  // the case when the call never returns.
992  if (MI->getDesc().isCall())
993  return !TII->doesNotReturn(*MI);
994 
995  // Check if the instruction defines a hardware loop register.
996  using namespace Hexagon;
997 
998  static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
999  static const unsigned Regs1[] = { LC1, SA1 };
1000  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
1001  : makeArrayRef(Regs1, array_lengthof(Regs1));
1002  for (unsigned R : CheckRegs)
1003  if (MI->modifiesRegister(R, TRI))
1004  return true;
1005 
1006  return false;
1007 }
1008 
1009 /// \brief Return true if the loop contains an instruction that inhibits
1010 /// the use of the hardware loop instruction.
1011 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1012  bool IsInnerHWLoop) const {
1013  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1014  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1015  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1016  MachineBasicBlock *MBB = Blocks[i];
1018  MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
1019  const MachineInstr *MI = &*MII;
1020  if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
1021  DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
1022  return true;
1023  }
1024  }
1025  }
1026  return false;
1027 }
1028 
1029 /// \brief Returns true if the instruction is dead. This was essentially
1030 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1031 /// for inline asm, physical registers and instructions with side effects
1032 /// removed.
1033 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1034  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1035  // Examine each operand.
1036  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1037  const MachineOperand &MO = MI->getOperand(i);
1038  if (!MO.isReg() || !MO.isDef())
1039  continue;
1040 
1041  unsigned Reg = MO.getReg();
1042  if (MRI->use_nodbg_empty(Reg))
1043  continue;
1044 
1045  using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1046 
1047  // This instruction has users, but if the only user is the phi node for the
1048  // parent block, and the only use of that phi node is this instruction, then
1049  // this instruction is dead: both it (and the phi node) can be removed.
1050  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1051  use_nodbg_iterator End = MRI->use_nodbg_end();
1052  if (std::next(I) != End || !I->getParent()->isPHI())
1053  return false;
1054 
1055  MachineInstr *OnePhi = I->getParent();
1056  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1057  const MachineOperand &OPO = OnePhi->getOperand(j);
1058  if (!OPO.isReg() || !OPO.isDef())
1059  continue;
1060 
1061  unsigned OPReg = OPO.getReg();
1062  use_nodbg_iterator nextJ;
1063  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1064  J != End; J = nextJ) {
1065  nextJ = std::next(J);
1066  MachineOperand &Use = *J;
1067  MachineInstr *UseMI = Use.getParent();
1068 
1069  // If the phi node has a user that is not MI, bail.
1070  if (MI != UseMI)
1071  return false;
1072  }
1073  }
1074  DeadPhis.push_back(OnePhi);
1075  }
1076 
1077  // If there are no defs with uses, the instruction is dead.
1078  return true;
1079 }
1080 
1081 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1082  // This procedure was essentially copied from DeadMachineInstructionElim.
1083 
1085  if (isDead(MI, DeadPhis)) {
1086  DEBUG(dbgs() << "HW looping will remove: " << *MI);
1087 
1088  // It is possible that some DBG_VALUE instructions refer to this
1089  // instruction. Examine each def operand for such references;
1090  // if found, mark the DBG_VALUE as undef (but don't delete it).
1091  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1092  const MachineOperand &MO = MI->getOperand(i);
1093  if (!MO.isReg() || !MO.isDef())
1094  continue;
1095  unsigned Reg = MO.getReg();
1098  E = MRI->use_end(); I != E; I = nextI) {
1099  nextI = std::next(I); // I is invalidated by the setReg
1100  MachineOperand &Use = *I;
1101  MachineInstr *UseMI = I->getParent();
1102  if (UseMI == MI)
1103  continue;
1104  if (Use.isDebug())
1105  UseMI->getOperand(0).setReg(0U);
1106  }
1107  }
1108 
1109  MI->eraseFromParent();
1110  for (unsigned i = 0; i < DeadPhis.size(); ++i)
1111  DeadPhis[i]->eraseFromParent();
1112  }
1113 }
1114 
1115 /// \brief Check if the loop is a candidate for converting to a hardware
1116 /// loop. If so, then perform the transformation.
1117 ///
1118 /// This function works on innermost loops first. A loop can be converted
1119 /// if it is a counting loop; either a register value or an immediate.
1120 ///
1121 /// The code makes several assumptions about the representation of the loop
1122 /// in llvm.
1123 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1124  bool &RecL0used,
1125  bool &RecL1used) {
1126  // This is just for sanity.
1127  assert(L->getHeader() && "Loop without a header?");
1128 
1129  bool Changed = false;
1130  bool L0Used = false;
1131  bool L1Used = false;
1132 
1133  // Process nested loops first.
1134  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1135  Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1136  L0Used |= RecL0used;
1137  L1Used |= RecL1used;
1138  }
1139 
1140  // If a nested loop has been converted, then we can't convert this loop.
1141  if (Changed && L0Used && L1Used)
1142  return Changed;
1143 
1144  unsigned LOOP_i;
1145  unsigned LOOP_r;
1146  unsigned ENDLOOP;
1147 
1148  // Flag used to track loopN instruction:
1149  // 1 - Hardware loop is being generated for the inner most loop.
1150  // 0 - Hardware loop is being generated for the outer loop.
1151  unsigned IsInnerHWLoop = 1;
1152 
1153  if (L0Used) {
1154  LOOP_i = Hexagon::J2_loop1i;
1155  LOOP_r = Hexagon::J2_loop1r;
1156  ENDLOOP = Hexagon::ENDLOOP1;
1157  IsInnerHWLoop = 0;
1158  } else {
1159  LOOP_i = Hexagon::J2_loop0i;
1160  LOOP_r = Hexagon::J2_loop0r;
1161  ENDLOOP = Hexagon::ENDLOOP0;
1162  }
1163 
1164 #ifndef NDEBUG
1165  // Stop trying after reaching the limit (if any).
1166  int Limit = HWLoopLimit;
1167  if (Limit >= 0) {
1168  if (Counter >= HWLoopLimit)
1169  return false;
1170  Counter++;
1171  }
1172 #endif
1173 
1174  // Does the loop contain any invalid instructions?
1175  if (containsInvalidInstruction(L, IsInnerHWLoop))
1176  return false;
1177 
1178  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1179  // Don't generate hw loop if the loop has more than one exit.
1180  if (!LastMBB)
1181  return false;
1182 
1184  if (LastI == LastMBB->end())
1185  return false;
1186 
1187  // Is the induction variable bump feeding the latch condition?
1188  if (!fixupInductionVariable(L))
1189  return false;
1190 
1191  // Ensure the loop has a preheader: the loop instruction will be
1192  // placed there.
1193  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1194  if (!Preheader) {
1195  Preheader = createPreheaderForLoop(L);
1196  if (!Preheader)
1197  return false;
1198  }
1199 
1200  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1201 
1203  // Are we able to determine the trip count for the loop?
1204  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1205  if (!TripCount)
1206  return false;
1207 
1208  // Is the trip count available in the preheader?
1209  if (TripCount->isReg()) {
1210  // There will be a use of the register inserted into the preheader,
1211  // so make sure that the register is actually defined at that point.
1212  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1213  MachineBasicBlock *BBDef = TCDef->getParent();
1214  if (!MDT->dominates(BBDef, Preheader))
1215  return false;
1216  }
1217 
1218  // Determine the loop start.
1219  MachineBasicBlock *TopBlock = L->getTopBlock();
1220  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1221  MachineBasicBlock *LoopStart = nullptr;
1222  if (ExitingBlock != L->getLoopLatch()) {
1223  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1225 
1226  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1227  return false;
1228 
1229  if (L->contains(TB))
1230  LoopStart = TB;
1231  else if (L->contains(FB))
1232  LoopStart = FB;
1233  else
1234  return false;
1235  }
1236  else
1237  LoopStart = TopBlock;
1238 
1239  // Convert the loop to a hardware loop.
1240  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1241  DebugLoc DL;
1242  if (InsertPos != Preheader->end())
1243  DL = InsertPos->getDebugLoc();
1244 
1245  if (TripCount->isReg()) {
1246  // Create a copy of the loop count register.
1247  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1248  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1249  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1250  // Add the Loop instruction to the beginning of the loop.
1251  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1252  .addReg(CountReg);
1253  } else {
1254  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1255  // Add the Loop immediate instruction to the beginning of the loop,
1256  // if the immediate fits in the instructions. Otherwise, we need to
1257  // create a new virtual register.
1258  int64_t CountImm = TripCount->getImm();
1259  if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1260  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1261  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1262  .addImm(CountImm);
1263  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1264  .addMBB(LoopStart).addReg(CountReg);
1265  } else
1266  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1267  .addMBB(LoopStart).addImm(CountImm);
1268  }
1269 
1270  // Make sure the loop start always has a reference in the CFG. We need
1271  // to create a BlockAddress operand to get this mechanism to work both the
1272  // MachineBasicBlock and BasicBlock objects need the flag set.
1273  LoopStart->setHasAddressTaken();
1274  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1275  // object.
1276  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1277 
1278  // Replace the loop branch with an endloop instruction.
1279  DebugLoc LastIDL = LastI->getDebugLoc();
1280  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1281 
1282  // The loop ends with either:
1283  // - a conditional branch followed by an unconditional branch, or
1284  // - a conditional branch to the loop start.
1285  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1286  LastI->getOpcode() == Hexagon::J2_jumpf) {
1287  // Delete one and change/add an uncond. branch to out of the loop.
1288  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1289  LastI = LastMBB->erase(LastI);
1290  if (!L->contains(BranchTarget)) {
1291  if (LastI != LastMBB->end())
1292  LastI = LastMBB->erase(LastI);
1294  TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1295  }
1296  } else {
1297  // Conditional branch to loop start; just delete it.
1298  LastMBB->erase(LastI);
1299  }
1300  delete TripCount;
1301 
1302  // The induction operation and the comparison may now be
1303  // unneeded. If these are unneeded, then remove them.
1304  for (unsigned i = 0; i < OldInsts.size(); ++i)
1305  removeIfDead(OldInsts[i]);
1306 
1307  ++NumHWLoops;
1308 
1309  // Set RecL1used and RecL0used only after hardware loop has been
1310  // successfully generated. Doing it earlier can cause wrong loop instruction
1311  // to be used.
1312  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1313  RecL1used = true;
1314  else
1315  RecL0used = true;
1316 
1317  return true;
1318 }
1319 
1320 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1321  MachineInstr *CmpI) {
1322  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1323 
1324  MachineBasicBlock *BB = BumpI->getParent();
1325  if (CmpI->getParent() != BB)
1326  return false;
1327 
1328  using instr_iterator = MachineBasicBlock::instr_iterator;
1329 
1330  // Check if things are in order to begin with.
1331  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1332  if (&*I == CmpI)
1333  return true;
1334 
1335  // Out of order.
1336  unsigned PredR = CmpI->getOperand(0).getReg();
1337  bool FoundBump = false;
1338  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1339  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1340  MachineInstr *In = &*I;
1341  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1342  MachineOperand &MO = In->getOperand(i);
1343  if (MO.isReg() && MO.isUse()) {
1344  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1345  return false;
1346  }
1347  }
1348 
1349  if (In == BumpI) {
1350  BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1351  FoundBump = true;
1352  break;
1353  }
1354  }
1355  assert (FoundBump && "Cannot determine instruction order");
1356  return FoundBump;
1357 }
1358 
1359 /// This function is required to break recursion. Visiting phis in a loop may
1360 /// result in recursion during compilation. We break the recursion by making
1361 /// sure that we visit a MachineOperand and its definition in a
1362 /// MachineInstruction only once. If we attempt to visit more than once, then
1363 /// there is recursion, and will return false.
1364 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1365  MachineInstr *MI,
1366  const MachineOperand *MO,
1367  LoopFeederMap &LoopFeederPhi) const {
1368  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1369  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1370  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1371  // Ignore all BBs that form Loop.
1372  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1373  MachineBasicBlock *MBB = Blocks[i];
1374  if (A == MBB)
1375  return false;
1376  }
1377  MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1378  LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1379  return true;
1380  } else
1381  // Already visited node.
1382  return false;
1383 }
1384 
1385 /// Return true if a Phi may generate a value that can underflow.
1386 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1387 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1388  MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1389  MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1390  assert(Phi->isPHI() && "Expecting a Phi.");
1391  // Walk through each Phi, and its used operands. Make sure that
1392  // if there is recursion in Phi, we won't generate hardware loops.
1393  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1394  if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1395  if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1396  Phi->getParent(), L, LoopFeederPhi))
1397  return true;
1398  return false;
1399 }
1400 
1401 /// Return true if the induction variable can underflow in the first iteration.
1402 /// An example, is an initial unsigned value that is 0 and is decrement in the
1403 /// first itertion of a do-while loop. In this case, we cannot generate a
1404 /// hardware loop because the endloop instruction does not decrement the loop
1405 /// counter if it is <= 1. We only need to perform this analysis if the
1406 /// initial value is a register.
1407 ///
1408 /// This function assumes the initial value may underfow unless proven
1409 /// otherwise. If the type is signed, then we don't care because signed
1410 /// underflow is undefined. We attempt to prove the initial value is not
1411 /// zero by perfoming a crude analysis of the loop counter. This function
1412 /// checks if the initial value is used in any comparison prior to the loop
1413 /// and, if so, assumes the comparison is a range check. This is inexact,
1414 /// but will catch the simple cases.
1415 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1416  const MachineOperand *InitVal, const MachineOperand *EndVal,
1417  MachineBasicBlock *MBB, MachineLoop *L,
1418  LoopFeederMap &LoopFeederPhi) const {
1419  // Only check register values since they are unknown.
1420  if (!InitVal->isReg())
1421  return false;
1422 
1423  if (!EndVal->isImm())
1424  return false;
1425 
1426  // A register value that is assigned an immediate is a known value, and it
1427  // won't underflow in the first iteration.
1428  int64_t Imm;
1429  if (checkForImmediate(*InitVal, Imm))
1430  return (EndVal->getImm() == Imm);
1431 
1432  unsigned Reg = InitVal->getReg();
1433 
1434  // We don't know the value of a physical register.
1436  return true;
1437 
1438  MachineInstr *Def = MRI->getVRegDef(Reg);
1439  if (!Def)
1440  return true;
1441 
1442  // If the initial value is a Phi or copy and the operands may not underflow,
1443  // then the definition cannot be underflow either.
1444  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1445  L, LoopFeederPhi))
1446  return false;
1447  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1448  EndVal, Def->getParent(),
1449  L, LoopFeederPhi))
1450  return false;
1451 
1452  // Iterate over the uses of the initial value. If the initial value is used
1453  // in a compare, then we assume this is a range check that ensures the loop
1454  // doesn't underflow. This is not an exact test and should be improved.
1456  E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1457  MachineInstr *MI = &*I;
1458  unsigned CmpReg1 = 0, CmpReg2 = 0;
1459  int CmpMask = 0, CmpValue = 0;
1460 
1461  if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1462  continue;
1463 
1464  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1466  if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1467  continue;
1468 
1469  Comparison::Kind Cmp =
1470  getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1471  if (Cmp == 0)
1472  continue;
1473  if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1474  Cmp = Comparison::getNegatedComparison(Cmp);
1475  if (CmpReg2 != 0 && CmpReg2 == Reg)
1476  Cmp = Comparison::getSwappedComparison(Cmp);
1477 
1478  // Signed underflow is undefined.
1479  if (Comparison::isSigned(Cmp))
1480  return false;
1481 
1482  // Check if there is a comparison of the initial value. If the initial value
1483  // is greater than or not equal to another value, then assume this is a
1484  // range check.
1485  if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1486  return false;
1487  }
1488 
1489  // OK - this is a hack that needs to be improved. We really need to analyze
1490  // the instructions performed on the initial value. This works on the simplest
1491  // cases only.
1492  if (!Def->isCopy() && !Def->isPHI())
1493  return false;
1494 
1495  return true;
1496 }
1497 
1498 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1499  int64_t &Val) const {
1500  if (MO.isImm()) {
1501  Val = MO.getImm();
1502  return true;
1503  }
1504  if (!MO.isReg())
1505  return false;
1506 
1507  // MO is a register. Check whether it is defined as an immediate value,
1508  // and if so, get the value of it in TV. That value will then need to be
1509  // processed to handle potential subregisters in MO.
1510  int64_t TV;
1511 
1512  unsigned R = MO.getReg();
1514  return false;
1515  MachineInstr *DI = MRI->getVRegDef(R);
1516  unsigned DOpc = DI->getOpcode();
1517  switch (DOpc) {
1518  case TargetOpcode::COPY:
1519  case Hexagon::A2_tfrsi:
1520  case Hexagon::A2_tfrpi:
1521  case Hexagon::CONST32:
1522  case Hexagon::CONST64:
1523  // Call recursively to avoid an extra check whether operand(1) is
1524  // indeed an immediate (it could be a global address, for example),
1525  // plus we can handle COPY at the same time.
1526  if (!checkForImmediate(DI->getOperand(1), TV))
1527  return false;
1528  break;
1529  case Hexagon::A2_combineii:
1530  case Hexagon::A4_combineir:
1531  case Hexagon::A4_combineii:
1532  case Hexagon::A4_combineri:
1533  case Hexagon::A2_combinew: {
1534  const MachineOperand &S1 = DI->getOperand(1);
1535  const MachineOperand &S2 = DI->getOperand(2);
1536  int64_t V1, V2;
1537  if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1538  return false;
1539  TV = V2 | (static_cast<uint64_t>(V1) << 32);
1540  break;
1541  }
1542  case TargetOpcode::REG_SEQUENCE: {
1543  const MachineOperand &S1 = DI->getOperand(1);
1544  const MachineOperand &S3 = DI->getOperand(3);
1545  int64_t V1, V3;
1546  if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1547  return false;
1548  unsigned Sub2 = DI->getOperand(2).getImm();
1549  unsigned Sub4 = DI->getOperand(4).getImm();
1550  if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1551  TV = V1 | (V3 << 32);
1552  else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1553  TV = V3 | (V1 << 32);
1554  else
1555  llvm_unreachable("Unexpected form of REG_SEQUENCE");
1556  break;
1557  }
1558 
1559  default:
1560  return false;
1561  }
1562 
1563  // By now, we should have successfully obtained the immediate value defining
1564  // the register referenced in MO. Handle a potential use of a subregister.
1565  switch (MO.getSubReg()) {
1566  case Hexagon::isub_lo:
1567  Val = TV & 0xFFFFFFFFULL;
1568  break;
1569  case Hexagon::isub_hi:
1570  Val = (TV >> 32) & 0xFFFFFFFFULL;
1571  break;
1572  default:
1573  Val = TV;
1574  break;
1575  }
1576  return true;
1577 }
1578 
1579 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1580  if (MO.isImm()) {
1581  MO.setImm(Val);
1582  return;
1583  }
1584 
1585  assert(MO.isReg());
1586  unsigned R = MO.getReg();
1587  MachineInstr *DI = MRI->getVRegDef(R);
1588 
1589  const TargetRegisterClass *RC = MRI->getRegClass(R);
1590  unsigned NewR = MRI->createVirtualRegister(RC);
1591  MachineBasicBlock &B = *DI->getParent();
1592  DebugLoc DL = DI->getDebugLoc();
1593  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1594  MO.setReg(NewR);
1595 }
1596 
1597 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1598  // These two instructions are not extendable.
1599  if (CmpOpc == Hexagon::A4_cmpbeqi)
1600  return isUInt<8>(Imm);
1601  if (CmpOpc == Hexagon::A4_cmpbgti)
1602  return isInt<8>(Imm);
1603  // The rest of the comparison-with-immediate instructions are extendable.
1604  return true;
1605 }
1606 
1607 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1608  MachineBasicBlock *Header = L->getHeader();
1609  MachineBasicBlock *Latch = L->getLoopLatch();
1610  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1611 
1612  if (!(Header && Latch && ExitingBlock))
1613  return false;
1614 
1615  // These data structures follow the same concept as the corresponding
1616  // ones in findInductionRegister (where some comments are).
1617  using RegisterBump = std::pair<unsigned, int64_t>;
1618  using RegisterInduction = std::pair<unsigned, RegisterBump>;
1619  using RegisterInductionSet = std::set<RegisterInduction>;
1620 
1621  // Register candidates for induction variables, with their associated bumps.
1622  RegisterInductionSet IndRegs;
1623 
1624  // Look for induction patterns:
1625  // vreg1 = PHI ..., [ latch, vreg2 ]
1626  // vreg2 = ADD vreg1, imm
1627  using instr_iterator = MachineBasicBlock::instr_iterator;
1628 
1629  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1630  I != E && I->isPHI(); ++I) {
1631  MachineInstr *Phi = &*I;
1632 
1633  // Have a PHI instruction.
1634  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1635  if (Phi->getOperand(i+1).getMBB() != Latch)
1636  continue;
1637 
1638  unsigned PhiReg = Phi->getOperand(i).getReg();
1639  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1640 
1641  if (DI->getDesc().isAdd()) {
1642  // If the register operand to the add/sub is the PHI we are looking
1643  // at, this meets the induction pattern.
1644  unsigned IndReg = DI->getOperand(1).getReg();
1645  MachineOperand &Opnd2 = DI->getOperand(2);
1646  int64_t V;
1647  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1648  unsigned UpdReg = DI->getOperand(0).getReg();
1649  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1650  }
1651  }
1652  } // for (i)
1653  } // for (instr)
1654 
1655  if (IndRegs.empty())
1656  return false;
1657 
1658  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1660  // AnalyzeBranch returns true if it fails to analyze branch.
1661  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1662  if (NotAnalyzed || Cond.empty())
1663  return false;
1664 
1665  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1666  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1668  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1669  if (NotAnalyzed)
1670  return false;
1671 
1672  // Since latch is not the exiting block, the latch branch should be an
1673  // unconditional branch to the loop header.
1674  if (TB == Latch)
1675  TB = (LTB == Header) ? LTB : LFB;
1676  else
1677  FB = (LTB == Header) ? LTB : LFB;
1678  }
1679  if (TB != Header) {
1680  if (FB != Header) {
1681  // The latch/exit block does not go back to the header.
1682  return false;
1683  }
1684  // FB is the header (i.e., uncond. jump to branch header)
1685  // In this case, the LoopBody -> TB should not be a back edge otherwise
1686  // it could result in an infinite loop after conversion to hw_loop.
1687  // This case can happen when the Latch has two jumps like this:
1688  // Jmp_c OuterLoopHeader <-- TB
1689  // Jmp InnerLoopHeader <-- FB
1690  if (MDT->dominates(TB, FB))
1691  return false;
1692  }
1693 
1694  // Expecting a predicate register as a condition. It won't be a hardware
1695  // predicate register at this point yet, just a vreg.
1696  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1697  // into Cond, followed by the predicate register. For non-negated branches
1698  // it's just the register.
1699  unsigned CSz = Cond.size();
1700  if (CSz != 1 && CSz != 2)
1701  return false;
1702 
1703  if (!Cond[CSz-1].isReg())
1704  return false;
1705 
1706  unsigned P = Cond[CSz-1].getReg();
1707  MachineInstr *PredDef = MRI->getVRegDef(P);
1708 
1709  if (!PredDef->isCompare())
1710  return false;
1711 
1712  SmallSet<unsigned,2> CmpRegs;
1713  MachineOperand *CmpImmOp = nullptr;
1714 
1715  // Go over all operands to the compare and look for immediate and register
1716  // operands. Assume that if the compare has a single register use and a
1717  // single immediate operand, then the register is being compared with the
1718  // immediate value.
1719  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1720  MachineOperand &MO = PredDef->getOperand(i);
1721  if (MO.isReg()) {
1722  // Skip all implicit references. In one case there was:
1723  // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1724  if (MO.isImplicit())
1725  continue;
1726  if (MO.isUse()) {
1727  if (!isImmediate(MO)) {
1728  CmpRegs.insert(MO.getReg());
1729  continue;
1730  }
1731  // Consider the register to be the "immediate" operand.
1732  if (CmpImmOp)
1733  return false;
1734  CmpImmOp = &MO;
1735  }
1736  } else if (MO.isImm()) {
1737  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1738  return false;
1739  CmpImmOp = &MO;
1740  }
1741  }
1742 
1743  if (CmpRegs.empty())
1744  return false;
1745 
1746  // Check if the compared register follows the order we want. Fix if needed.
1747  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1748  I != E; ++I) {
1749  // This is a success. If the register used in the comparison is one that
1750  // we have identified as a bumped (updated) induction register, there is
1751  // nothing to do.
1752  if (CmpRegs.count(I->first))
1753  return true;
1754 
1755  // Otherwise, if the register being compared comes out of a PHI node,
1756  // and has been recognized as following the induction pattern, and is
1757  // compared against an immediate, we can fix it.
1758  const RegisterBump &RB = I->second;
1759  if (CmpRegs.count(RB.first)) {
1760  if (!CmpImmOp) {
1761  // If both operands to the compare instruction are registers, see if
1762  // it can be changed to use induction register as one of the operands.
1763  MachineInstr *IndI = nullptr;
1764  MachineInstr *nonIndI = nullptr;
1765  MachineOperand *IndMO = nullptr;
1766  MachineOperand *nonIndMO = nullptr;
1767 
1768  for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1769  MachineOperand &MO = PredDef->getOperand(i);
1770  if (MO.isReg() && MO.getReg() == RB.first) {
1771  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1772  << *(MRI->getVRegDef(I->first)));
1773  if (IndI)
1774  return false;
1775 
1776  IndI = MRI->getVRegDef(I->first);
1777  IndMO = &MO;
1778  } else if (MO.isReg()) {
1779  DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1780  << *(MRI->getVRegDef(MO.getReg())));
1781  if (nonIndI)
1782  return false;
1783 
1784  nonIndI = MRI->getVRegDef(MO.getReg());
1785  nonIndMO = &MO;
1786  }
1787  }
1788  if (IndI && nonIndI &&
1789  nonIndI->getOpcode() == Hexagon::A2_addi &&
1790  nonIndI->getOperand(2).isImm() &&
1791  nonIndI->getOperand(2).getImm() == - RB.second) {
1792  bool Order = orderBumpCompare(IndI, PredDef);
1793  if (Order) {
1794  IndMO->setReg(I->first);
1795  nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1796  return true;
1797  }
1798  }
1799  return false;
1800  }
1801 
1802  // It is not valid to do this transformation on an unsigned comparison
1803  // because it may underflow.
1804  Comparison::Kind Cmp =
1805  getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1806  if (!Cmp || Comparison::isUnsigned(Cmp))
1807  return false;
1808 
1809  // If the register is being compared against an immediate, try changing
1810  // the compare instruction to use induction register and adjust the
1811  // immediate operand.
1812  int64_t CmpImm = getImmediate(*CmpImmOp);
1813  int64_t V = RB.second;
1814  // Handle Overflow (64-bit).
1815  if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1816  ((V < 0) && (CmpImm < INT64_MIN - V)))
1817  return false;
1818  CmpImm += V;
1819  // Most comparisons of register against an immediate value allow
1820  // the immediate to be constant-extended. There are some exceptions
1821  // though. Make sure the new combination will work.
1822  if (CmpImmOp->isImm())
1823  if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1824  return false;
1825 
1826  // Make sure that the compare happens after the bump. Otherwise,
1827  // after the fixup, the compare would use a yet-undefined register.
1828  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1829  bool Order = orderBumpCompare(BumpI, PredDef);
1830  if (!Order)
1831  return false;
1832 
1833  // Finally, fix the compare instruction.
1834  setImmediate(*CmpImmOp, CmpImm);
1835  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1836  MachineOperand &MO = PredDef->getOperand(i);
1837  if (MO.isReg() && MO.getReg() == RB.first) {
1838  MO.setReg(I->first);
1839  return true;
1840  }
1841  }
1842  }
1843  }
1844 
1845  return false;
1846 }
1847 
1848 /// createPreheaderForLoop - Create a preheader for a given loop.
1849 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1850  MachineLoop *L) {
1851  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1852  return TmpPH;
1853  if (!HWCreatePreheader)
1854  return nullptr;
1855 
1856  MachineBasicBlock *Header = L->getHeader();
1857  MachineBasicBlock *Latch = L->getLoopLatch();
1858  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1859  MachineFunction *MF = Header->getParent();
1860  DebugLoc DL;
1861 
1862 #ifndef NDEBUG
1863  if ((!PHFn.empty()) && (PHFn != MF->getName()))
1864  return nullptr;
1865 #endif
1866 
1867  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1868  return nullptr;
1869 
1870  using instr_iterator = MachineBasicBlock::instr_iterator;
1871 
1872  // Verify that all existing predecessors have analyzable branches
1873  // (or no branches at all).
1874  using MBBVector = std::vector<MachineBasicBlock *>;
1875 
1876  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1878  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1879 
1880  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1881  return nullptr;
1882 
1883  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1884  MachineBasicBlock *PB = *I;
1885  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1886  if (NotAnalyzed)
1887  return nullptr;
1888  }
1889 
1891  MF->insert(Header->getIterator(), NewPH);
1892 
1893  if (Header->pred_size() > 2) {
1894  // Ensure that the header has only two predecessors: the preheader and
1895  // the loop latch. Any additional predecessors of the header should
1896  // join at the newly created preheader. Inspect all PHI nodes from the
1897  // header and create appropriate corresponding PHI nodes in the preheader.
1898 
1899  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1900  I != E && I->isPHI(); ++I) {
1901  MachineInstr *PN = &*I;
1902 
1903  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1904  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1905  NewPH->insert(NewPH->end(), NewPN);
1906 
1907  unsigned PR = PN->getOperand(0).getReg();
1908  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1909  unsigned NewPR = MRI->createVirtualRegister(RC);
1910  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1911 
1912  // Copy all non-latch operands of a header's PHI node to the newly
1913  // created PHI node in the preheader.
1914  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1915  unsigned PredR = PN->getOperand(i).getReg();
1916  unsigned PredRSub = PN->getOperand(i).getSubReg();
1917  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1918  if (PredB == Latch)
1919  continue;
1920 
1921  MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1922  MO.setSubReg(PredRSub);
1923  NewPN->addOperand(MO);
1924  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1925  }
1926 
1927  // Remove copied operands from the old PHI node and add the value
1928  // coming from the preheader's PHI.
1929  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1930  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1931  if (PredB != Latch) {
1932  PN->RemoveOperand(i+1);
1933  PN->RemoveOperand(i);
1934  }
1935  }
1936  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1938  }
1939  } else {
1940  assert(Header->pred_size() == 2);
1941 
1942  // The header has only two predecessors, but the non-latch predecessor
1943  // is not a preheader (e.g. it has other successors, etc.)
1944  // In such a case we don't need any extra PHI nodes in the new preheader,
1945  // all we need is to adjust existing PHIs in the header to now refer to
1946  // the new preheader.
1947  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1948  I != E && I->isPHI(); ++I) {
1949  MachineInstr *PN = &*I;
1950  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1951  MachineOperand &MO = PN->getOperand(i+1);
1952  if (MO.getMBB() != Latch)
1953  MO.setMBB(NewPH);
1954  }
1955  }
1956  }
1957 
1958  // "Reroute" the CFG edges to link in the new preheader.
1959  // If any of the predecessors falls through to the header, insert a branch
1960  // to the new preheader in that place.
1963 
1964  TB = FB = nullptr;
1965 
1966  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1967  MachineBasicBlock *PB = *I;
1968  if (PB != Latch) {
1969  Tmp2.clear();
1970  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1971  (void)NotAnalyzed; // suppress compiler warning
1972  assert (!NotAnalyzed && "Should be analyzable!");
1973  if (TB != Header && (Tmp2.empty() || FB != Header))
1974  TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1975  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1976  }
1977  }
1978 
1979  // It can happen that the latch block will fall through into the header.
1980  // Insert an unconditional branch to the header.
1981  TB = FB = nullptr;
1982  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1983  (void)LatchNotAnalyzed; // suppress compiler warning
1984  assert (!LatchNotAnalyzed && "Should be analyzable!");
1985  if (!TB && !FB)
1986  TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1987 
1988  // Finally, the branch from the preheader to the header.
1989  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1990  NewPH->addSuccessor(Header);
1991 
1992  MachineLoop *ParentLoop = L->getParentLoop();
1993  if (ParentLoop)
1994  ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1995 
1996  // Update the dominator information with the new preheader.
1997  if (MDT) {
1998  if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1999  if (MachineDomTreeNode *DHN = HN->getIDom()) {
2000  MDT->addNewBlock(NewPH, DHN->getBlock());
2001  MDT->changeImmediateDominator(Header, NewPH);
2002  }
2003  }
2004  }
2005 
2006  return NewPH;
2007 }
static bool isReg(const MCInst &MI, unsigned OpNo)
void push_back(const T &Elt)
Definition: SmallVector.h:212
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:965
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
void initializeHexagonHardwareLoopsPass(PassRegistry &)
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
FunctionPass * createHexagonHardwareLoops()
use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const
bool isPHI() const
Definition: MachineInstr.h:826
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
static use_nodbg_iterator use_nodbg_end()
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
Hexagon Hardware Loops
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:56
static use_iterator use_end()
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
BlockT * getHeader() const
Definition: LoopInfo.h:100
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
std::vector< MachineLoop *>::const_iterator iterator
Definition: LoopInfo.h:139
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool doesNotReturn(const MachineInstr &CallMI) const
MachineBasicBlock * findLoopControlBlock()
Find the block that contains the loop control variable and the loop test.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:183
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register...
Base class for the actual dominator tree node.
#define EQ(a, b)
Definition: regexec.c:112
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
bool getPredReg(ArrayRef< MachineOperand > Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:519
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1339
MachineInstrBuilder & UseMI
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:426
static const unsigned End
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
self_iterator getIterator()
Definition: ilist_node.h:82
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
std::vector< MachineBasicBlock * >::iterator pred_iterator
bool isCopy() const
Definition: MachineInstr.h:857
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:248
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
iterator begin() const
Definition: LoopInfo.h:142
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:720
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
int64_t getImm() const
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
void setSubReg(unsigned subReg)
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:251
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator end() const
Definition: LoopInfo.h:143
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader...
Instructions::iterator instr_iterator
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", "Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " "instructions"))
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isImplicit() const
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65