LLVM  16.0.0git
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the Hexagon hardware
10 // loop instruction. The hardware loop can perform loop branches with a
11 // zero-cycle overhead.
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 // The pattern detected by this phase is due to running Strength Reduction.
18 //
19 // Criteria for hardware loops:
20 // - Countable loops (w/ ind. var for a trip count)
21 // - Assumes loops are normalized by IndVarSimplify
22 // - Try inner-most loops first
23 // - No function calls in loops.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "HexagonInstrInfo.h"
28 #include "HexagonSubtarget.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Pass.h"
50 #include "llvm/Support/Debug.h"
54 #include <cassert>
55 #include <cstdint>
56 #include <cstdlib>
57 #include <iterator>
58 #include <map>
59 #include <set>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "hwloops"
67 
68 #ifndef NDEBUG
69 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70 
71 // Option to create preheader only for a specific function.
72 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73  cl::init(""));
74 #endif
75 
76 // Option to create a preheader if one doesn't exist.
77 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78  cl::Hidden, cl::init(true),
79  cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80 
81 // Turn it off by default. If a preheader block is not created here, the
82 // software pipeliner may be unable to find a block suitable to serve as
83 // a preheader. In that case SWP will not run.
84 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
85  cl::desc("Allow speculation of preheader "
86  "instructions"));
87 
88 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89 
90 namespace llvm {
91 
94 
95 } // end namespace llvm
96 
97 namespace {
98 
99  class CountValue;
100 
101  struct HexagonHardwareLoops : public MachineFunctionPass {
102  MachineLoopInfo *MLI;
105  const HexagonInstrInfo *TII;
106  const HexagonRegisterInfo *TRI;
107 #ifndef NDEBUG
108  static int Counter;
109 #endif
110 
111  public:
112  static char ID;
113 
114  HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115 
116  bool runOnMachineFunction(MachineFunction &MF) override;
117 
118  StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119 
120  void getAnalysisUsage(AnalysisUsage &AU) const override {
124  }
125 
126  private:
127  using LoopFeederMap = std::map<Register, MachineInstr *>;
128 
129  /// Kinds of comparisons in the compare instructions.
130  struct Comparison {
131  enum Kind {
132  EQ = 0x01,
133  NE = 0x02,
134  L = 0x04,
135  G = 0x08,
136  U = 0x40,
137  LTs = L,
138  LEs = L | EQ,
139  GTs = G,
140  GEs = G | EQ,
141  LTu = L | U,
142  LEu = L | EQ | U,
143  GTu = G | U,
144  GEu = G | EQ | U
145  };
146 
147  static Kind getSwappedComparison(Kind Cmp) {
148  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
149  if ((Cmp & L) || (Cmp & G))
150  return (Kind)(Cmp ^ (L|G));
151  return Cmp;
152  }
153 
154  static Kind getNegatedComparison(Kind Cmp) {
155  if ((Cmp & L) || (Cmp & G))
156  return (Kind)((Cmp ^ (L | G)) ^ EQ);
157  if ((Cmp & NE) || (Cmp & EQ))
158  return (Kind)(Cmp ^ (EQ | NE));
159  return (Kind)0;
160  }
161 
162  static bool isSigned(Kind Cmp) {
163  return (Cmp & (L | G) && !(Cmp & U));
164  }
165 
166  static bool isUnsigned(Kind Cmp) {
167  return (Cmp & U);
168  }
169  };
170 
171  /// Find the register that contains the loop controlling
172  /// induction variable.
173  /// If successful, it will return true and set the \p Reg, \p IVBump
174  /// and \p IVOp arguments. Otherwise it will return false.
175  /// The returned induction register is the register R that follows the
176  /// following induction pattern:
177  /// loop:
178  /// R = phi ..., [ R.next, LatchBlock ]
179  /// R.next = R + #bump
180  /// if (R.next < #N) goto loop
181  /// IVBump is the immediate value added to R, and IVOp is the instruction
182  /// "R.next = R + #bump".
183  bool findInductionRegister(MachineLoop *L, Register &Reg,
184  int64_t &IVBump, MachineInstr *&IVOp) const;
185 
186  /// Return the comparison kind for the specified opcode.
187  Comparison::Kind getComparisonKind(unsigned CondOpc,
188  MachineOperand *InitialValue,
189  const MachineOperand *Endvalue,
190  int64_t IVBump) const;
191 
192  /// Analyze the statements in a loop to determine if the loop
193  /// has a computable trip count and, if so, return a value that represents
194  /// the trip count expression.
195  CountValue *getLoopTripCount(MachineLoop *L,
197 
198  /// Return the expression that represents the number of times
199  /// a loop iterates. The function takes the operands that represent the
200  /// loop start value, loop end value, and induction value. Based upon
201  /// these operands, the function attempts to compute the trip count.
202  /// If the trip count is not directly available (as an immediate value,
203  /// or a register), the function will attempt to insert computation of it
204  /// to the loop's preheader.
205  CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
206  const MachineOperand *End, Register IVReg,
207  int64_t IVBump, Comparison::Kind Cmp) const;
208 
209  /// Return true if the instruction is not valid within a hardware
210  /// loop.
211  bool isInvalidLoopOperation(const MachineInstr *MI,
212  bool IsInnerHWLoop) const;
213 
214  /// Return true if the loop contains an instruction that inhibits
215  /// using the hardware loop.
216  bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217 
218  /// Given a loop, check if we can convert it to a hardware loop.
219  /// If so, then perform the conversion and return true.
220  bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221 
222  /// Return true if the instruction is now dead.
223  bool isDead(const MachineInstr *MI,
224  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225 
226  /// Remove the instruction if it is now dead.
227  void removeIfDead(MachineInstr *MI);
228 
229  /// Make sure that the "bump" instruction executes before the
230  /// compare. We need that for the IV fixup, so that the compare
231  /// instruction would not use a bumped value that has not yet been
232  /// defined. If the instructions are out of order, try to reorder them.
233  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234 
235  /// Return true if MO and MI pair is visited only once. If visited
236  /// more than once, this indicates there is recursion. In such a case,
237  /// return false.
238  bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
239  const MachineOperand *MO,
240  LoopFeederMap &LoopFeederPhi) const;
241 
242  /// Return true if the Phi may generate a value that may underflow,
243  /// or may wrap.
244  bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
246  LoopFeederMap &LoopFeederPhi) const;
247 
248  /// Return true if the induction variable may underflow an unsigned
249  /// value in the first iteration.
250  bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
251  const MachineOperand *EndVal,
253  LoopFeederMap &LoopFeederPhi) const;
254 
255  /// Check if the given operand has a compile-time known constant
256  /// value. Return true if yes, and false otherwise. When returning true, set
257  /// Val to the corresponding constant value.
258  bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259 
260  /// Check if the operand has a compile-time known constant value.
261  bool isImmediate(const MachineOperand &MO) const {
262  int64_t V;
263  return checkForImmediate(MO, V);
264  }
265 
266  /// Return the immediate for the specified operand.
267  int64_t getImmediate(const MachineOperand &MO) const {
268  int64_t V;
269  if (!checkForImmediate(MO, V))
270  llvm_unreachable("Invalid operand");
271  return V;
272  }
273 
274  /// Reset the given machine operand to now refer to a new immediate
275  /// value. Assumes that the operand was already referencing an immediate
276  /// value, either directly, or via a register.
277  void setImmediate(MachineOperand &MO, int64_t Val);
278 
279  /// Fix the data flow of the induction variable.
280  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
281  /// |
282  /// +-> back to phi
283  /// where "bump" is the increment of the induction variable:
284  /// iv = iv + #const.
285  /// Due to some prior code transformations, the actual flow may look
286  /// like this:
287  /// phi -+-> bump ---> back to phi
288  /// |
289  /// +-> comparison-in-latch (against upper_bound-bump),
290  /// i.e. the comparison that controls the loop execution may be using
291  /// the value of the induction variable from before the increment.
292  ///
293  /// Return true if the loop's flow is the desired one (i.e. it's
294  /// either been fixed, or no fixing was necessary).
295  /// Otherwise, return false. This can happen if the induction variable
296  /// couldn't be identified, or if the value in the latch's comparison
297  /// cannot be adjusted to reflect the post-bump value.
298  bool fixupInductionVariable(MachineLoop *L);
299 
300  /// Given a loop, if it does not have a preheader, create one.
301  /// Return the block that is the preheader.
302  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
303  };
304 
305  char HexagonHardwareLoops::ID = 0;
306 #ifndef NDEBUG
307  int HexagonHardwareLoops::Counter = 0;
308 #endif
309 
310  /// Abstraction for a trip count of a loop. A smaller version
311  /// of the MachineOperand class without the concerns of changing the
312  /// operand representation.
313  class CountValue {
314  public:
315  enum CountValueType {
316  CV_Register,
317  CV_Immediate
318  };
319 
320  private:
321  CountValueType Kind;
322  union Values {
323  Values() : R{Register(), 0} {}
324  Values(const Values&) = default;
325  struct {
326  Register Reg;
327  unsigned Sub;
328  } R;
329  unsigned ImmVal;
330  } Contents;
331 
332  public:
333  explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
334  Kind = t;
335  if (Kind == CV_Register) {
336  Contents.R.Reg = v;
337  Contents.R.Sub = u;
338  } else {
339  Contents.ImmVal = v;
340  }
341  }
342 
343  bool isReg() const { return Kind == CV_Register; }
344  bool isImm() const { return Kind == CV_Immediate; }
345 
346  Register getReg() const {
347  assert(isReg() && "Wrong CountValue accessor");
348  return Contents.R.Reg;
349  }
350 
351  unsigned getSubReg() const {
352  assert(isReg() && "Wrong CountValue accessor");
353  return Contents.R.Sub;
354  }
355 
356  unsigned getImm() const {
357  assert(isImm() && "Wrong CountValue accessor");
358  return Contents.ImmVal;
359  }
360 
361  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
362  if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
363  if (isImm()) { OS << Contents.ImmVal; }
364  }
365  };
366 
367 } // end anonymous namespace
368 
369 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
370  "Hexagon Hardware Loops", false, false)
373 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
374  "Hexagon Hardware Loops", false, false)
375 
377  return new HexagonHardwareLoops();
378 }
379 
380 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
381  LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
382  if (skipFunction(MF.getFunction()))
383  return false;
384 
385  bool Changed = false;
386 
387  MLI = &getAnalysis<MachineLoopInfo>();
388  MRI = &MF.getRegInfo();
389  MDT = &getAnalysis<MachineDominatorTree>();
391  TII = HST.getInstrInfo();
392  TRI = HST.getRegisterInfo();
393 
394  for (auto &L : *MLI)
395  if (L->isOutermost()) {
396  bool L0Used = false;
397  bool L1Used = false;
398  Changed |= convertToHardwareLoop(L, L0Used, L1Used);
399  }
400 
401  return Changed;
402 }
403 
404 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
405  Register &Reg,
406  int64_t &IVBump,
407  MachineInstr *&IVOp
408  ) const {
409  MachineBasicBlock *Header = L->getHeader();
410  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
411  MachineBasicBlock *Latch = L->getLoopLatch();
412  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
413  if (!Header || !Preheader || !Latch || !ExitingBlock)
414  return false;
415 
416  // This pair represents an induction register together with an immediate
417  // value that will be added to it in each loop iteration.
418  using RegisterBump = std::pair<Register, int64_t>;
419 
420  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
421  // from an induction operation
422  // R.next = R + bump
423  // where bump is an immediate value.
424  using InductionMap = std::map<Register, RegisterBump>;
425 
426  InductionMap IndMap;
427 
428  using instr_iterator = MachineBasicBlock::instr_iterator;
429 
430  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
431  I != E && I->isPHI(); ++I) {
432  MachineInstr *Phi = &*I;
433 
434  // Have a PHI instruction. Get the operand that corresponds to the
435  // latch block, and see if is a result of an addition of form "reg+imm",
436  // where the "reg" is defined by the PHI node we are looking at.
437  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
438  if (Phi->getOperand(i+1).getMBB() != Latch)
439  continue;
440 
441  Register PhiOpReg = Phi->getOperand(i).getReg();
442  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
443 
444  if (DI->getDesc().isAdd()) {
445  // If the register operand to the add is the PHI we're looking at, this
446  // meets the induction pattern.
447  Register IndReg = DI->getOperand(1).getReg();
448  MachineOperand &Opnd2 = DI->getOperand(2);
449  int64_t V;
450  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
451  Register UpdReg = DI->getOperand(0).getReg();
452  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
453  }
454  }
455  } // for (i)
456  } // for (instr)
457 
459  MachineBasicBlock *TB = nullptr, *FB = nullptr;
460  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
461  if (NotAnalyzed)
462  return false;
463 
464  Register PredR;
465  unsigned PredPos, PredRegFlags;
466  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
467  return false;
468 
469  MachineInstr *PredI = MRI->getVRegDef(PredR);
470  if (!PredI->isCompare())
471  return false;
472 
473  Register CmpReg1, CmpReg2;
474  int64_t CmpImm = 0, CmpMask = 0;
475  bool CmpAnalyzed =
476  TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
477  // Fail if the compare was not analyzed, or it's not comparing a register
478  // with an immediate value. Not checking the mask here, since we handle
479  // the individual compare opcodes (including A4_cmpb*) later on.
480  if (!CmpAnalyzed)
481  return false;
482 
483  // Exactly one of the input registers to the comparison should be among
484  // the induction registers.
485  InductionMap::iterator IndMapEnd = IndMap.end();
486  InductionMap::iterator F = IndMapEnd;
487  if (CmpReg1 != 0) {
488  InductionMap::iterator F1 = IndMap.find(CmpReg1);
489  if (F1 != IndMapEnd)
490  F = F1;
491  }
492  if (CmpReg2 != 0) {
493  InductionMap::iterator F2 = IndMap.find(CmpReg2);
494  if (F2 != IndMapEnd) {
495  if (F != IndMapEnd)
496  return false;
497  F = F2;
498  }
499  }
500  if (F == IndMapEnd)
501  return false;
502 
503  Reg = F->second.first;
504  IVBump = F->second.second;
505  IVOp = MRI->getVRegDef(F->first);
506  return true;
507 }
508 
509 // Return the comparison kind for the specified opcode.
510 HexagonHardwareLoops::Comparison::Kind
511 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
512  MachineOperand *InitialValue,
513  const MachineOperand *EndValue,
514  int64_t IVBump) const {
516  switch (CondOpc) {
517  case Hexagon::C2_cmpeq:
518  case Hexagon::C2_cmpeqi:
519  case Hexagon::C2_cmpeqp:
521  break;
522  case Hexagon::C4_cmpneq:
523  case Hexagon::C4_cmpneqi:
525  break;
526  case Hexagon::C2_cmplt:
527  Cmp = Comparison::LTs;
528  break;
529  case Hexagon::C2_cmpltu:
530  Cmp = Comparison::LTu;
531  break;
532  case Hexagon::C4_cmplte:
533  case Hexagon::C4_cmpltei:
534  Cmp = Comparison::LEs;
535  break;
536  case Hexagon::C4_cmplteu:
537  case Hexagon::C4_cmplteui:
538  Cmp = Comparison::LEu;
539  break;
540  case Hexagon::C2_cmpgt:
541  case Hexagon::C2_cmpgti:
542  case Hexagon::C2_cmpgtp:
543  Cmp = Comparison::GTs;
544  break;
545  case Hexagon::C2_cmpgtu:
546  case Hexagon::C2_cmpgtui:
547  case Hexagon::C2_cmpgtup:
548  Cmp = Comparison::GTu;
549  break;
550  case Hexagon::C2_cmpgei:
551  Cmp = Comparison::GEs;
552  break;
553  case Hexagon::C2_cmpgeui:
554  Cmp = Comparison::GEs;
555  break;
556  default:
557  return (Comparison::Kind)0;
558  }
559  return Cmp;
560 }
561 
562 /// Analyze the statements in a loop to determine if the loop has
563 /// a computable trip count and, if so, return a value that represents
564 /// the trip count expression.
565 ///
566 /// This function iterates over the phi nodes in the loop to check for
567 /// induction variable patterns that are used in the calculation for
568 /// the number of time the loop is executed.
569 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
571  MachineBasicBlock *TopMBB = L->getTopBlock();
573  assert(PI != TopMBB->pred_end() &&
574  "Loop must have more than one incoming edge!");
575  MachineBasicBlock *Backedge = *PI++;
576  if (PI == TopMBB->pred_end()) // dead loop?
577  return nullptr;
578  MachineBasicBlock *Incoming = *PI++;
579  if (PI != TopMBB->pred_end()) // multiple backedges?
580  return nullptr;
581 
582  // Make sure there is one incoming and one backedge and determine which
583  // is which.
584  if (L->contains(Incoming)) {
585  if (L->contains(Backedge))
586  return nullptr;
587  std::swap(Incoming, Backedge);
588  } else if (!L->contains(Backedge))
589  return nullptr;
590 
591  // Look for the cmp instruction to determine if we can get a useful trip
592  // count. The trip count can be either a register or an immediate. The
593  // location of the value depends upon the type (reg or imm).
594  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
595  if (!ExitingBlock)
596  return nullptr;
597 
598  Register IVReg = 0;
599  int64_t IVBump = 0;
600  MachineInstr *IVOp;
601  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
602  if (!FoundIV)
603  return nullptr;
604 
605  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
606 
607  MachineOperand *InitialValue = nullptr;
608  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
609  MachineBasicBlock *Latch = L->getLoopLatch();
610  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
611  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
612  if (MBB == Preheader)
613  InitialValue = &IV_Phi->getOperand(i);
614  else if (MBB == Latch)
615  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
616  }
617  if (!InitialValue)
618  return nullptr;
619 
621  MachineBasicBlock *TB = nullptr, *FB = nullptr;
622  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
623  if (NotAnalyzed)
624  return nullptr;
625 
626  MachineBasicBlock *Header = L->getHeader();
627  // TB must be non-null. If FB is also non-null, one of them must be
628  // the header. Otherwise, branch to TB could be exiting the loop, and
629  // the fall through can go to the header.
630  assert (TB && "Exit block without a branch?");
631  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
632  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
634  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
635  if (NotAnalyzed)
636  return nullptr;
637  if (TB == Latch)
638  TB = (LTB == Header) ? LTB : LFB;
639  else
640  FB = (LTB == Header) ? LTB: LFB;
641  }
642  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
643  if (!TB || (FB && TB != Header && FB != Header))
644  return nullptr;
645 
646  // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
647  // to put imm(0), followed by P in the vector Cond.
648  // If TB is not the header, it means that the "not-taken" path must lead
649  // to the header.
650  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
651  Register PredReg;
652  unsigned PredPos, PredRegFlags;
653  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
654  return nullptr;
655  MachineInstr *CondI = MRI->getVRegDef(PredReg);
656  unsigned CondOpc = CondI->getOpcode();
657 
658  Register CmpReg1, CmpReg2;
659  int64_t Mask = 0, ImmValue = 0;
660  bool AnalyzedCmp =
661  TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
662  if (!AnalyzedCmp)
663  return nullptr;
664 
665  // The comparison operator type determines how we compute the loop
666  // trip count.
667  OldInsts.push_back(CondI);
668  OldInsts.push_back(IVOp);
669 
670  // Sadly, the following code gets information based on the position
671  // of the operands in the compare instruction. This has to be done
672  // this way, because the comparisons check for a specific relationship
673  // between the operands (e.g. is-less-than), rather than to find out
674  // what relationship the operands are in (as on PPC).
676  bool isSwapped = false;
677  const MachineOperand &Op1 = CondI->getOperand(1);
678  const MachineOperand &Op2 = CondI->getOperand(2);
679  const MachineOperand *EndValue = nullptr;
680 
681  if (Op1.isReg()) {
682  if (Op2.isImm() || Op1.getReg() == IVReg)
683  EndValue = &Op2;
684  else {
685  EndValue = &Op1;
686  isSwapped = true;
687  }
688  }
689 
690  if (!EndValue)
691  return nullptr;
692 
693  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
694  if (!Cmp)
695  return nullptr;
696  if (Negated)
697  Cmp = Comparison::getNegatedComparison(Cmp);
698  if (isSwapped)
699  Cmp = Comparison::getSwappedComparison(Cmp);
700 
701  if (InitialValue->isReg()) {
702  Register R = InitialValue->getReg();
703  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
704  if (!MDT->properlyDominates(DefBB, Header)) {
705  int64_t V;
706  if (!checkForImmediate(*InitialValue, V))
707  return nullptr;
708  }
709  OldInsts.push_back(MRI->getVRegDef(R));
710  }
711  if (EndValue->isReg()) {
712  Register R = EndValue->getReg();
713  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
714  if (!MDT->properlyDominates(DefBB, Header)) {
715  int64_t V;
716  if (!checkForImmediate(*EndValue, V))
717  return nullptr;
718  }
719  OldInsts.push_back(MRI->getVRegDef(R));
720  }
721 
722  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
723 }
724 
725 /// Helper function that returns the expression that represents the
726 /// number of times a loop iterates. The function takes the operands that
727 /// represent the loop start value, loop end value, and induction value.
728 /// Based upon these operands, the function attempts to compute the trip count.
729 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
730  const MachineOperand *Start,
731  const MachineOperand *End,
732  Register IVReg,
733  int64_t IVBump,
734  Comparison::Kind Cmp) const {
735  // Cannot handle comparison EQ, i.e. while (A == B).
736  if (Cmp == Comparison::EQ)
737  return nullptr;
738 
739  // Check if either the start or end values are an assignment of an immediate.
740  // If so, use the immediate value rather than the register.
741  if (Start->isReg()) {
742  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
743  if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
744  StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
745  Start = &StartValInstr->getOperand(1);
746  }
747  if (End->isReg()) {
748  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
749  if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
750  EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
751  End = &EndValInstr->getOperand(1);
752  }
753 
754  if (!Start->isReg() && !Start->isImm())
755  return nullptr;
756  if (!End->isReg() && !End->isImm())
757  return nullptr;
758 
759  bool CmpLess = Cmp & Comparison::L;
760  bool CmpGreater = Cmp & Comparison::G;
761  bool CmpHasEqual = Cmp & Comparison::EQ;
762 
763  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
764  if (CmpLess && IVBump < 0)
765  // Loop going while iv is "less" with the iv value going down. Must wrap.
766  return nullptr;
767 
768  if (CmpGreater && IVBump > 0)
769  // Loop going while iv is "greater" with the iv value going up. Must wrap.
770  return nullptr;
771 
772  // Phis that may feed into the loop.
773  LoopFeederMap LoopFeederPhi;
774 
775  // Check if the initial value may be zero and can be decremented in the first
776  // iteration. If the value is zero, the endloop instruction will not decrement
777  // the loop counter, so we shouldn't generate a hardware loop in this case.
778  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
779  LoopFeederPhi))
780  return nullptr;
781 
782  if (Start->isImm() && End->isImm()) {
783  // Both, start and end are immediates.
784  int64_t StartV = Start->getImm();
785  int64_t EndV = End->getImm();
786  int64_t Dist = EndV - StartV;
787  if (Dist == 0)
788  return nullptr;
789 
790  bool Exact = (Dist % IVBump) == 0;
791 
792  if (Cmp == Comparison::NE) {
793  if (!Exact)
794  return nullptr;
795  if ((Dist < 0) ^ (IVBump < 0))
796  return nullptr;
797  }
798 
799  // For comparisons that include the final value (i.e. include equality
800  // with the final value), we need to increase the distance by 1.
801  if (CmpHasEqual)
802  Dist = Dist > 0 ? Dist+1 : Dist-1;
803 
804  // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
805  // CmpGreater should imply Dist < 0. These conditions could actually
806  // fail, for example, in unreachable code (which may still appear to be
807  // reachable in the CFG).
808  if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
809  return nullptr;
810 
811  // "Normalized" distance, i.e. with the bump set to +-1.
812  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
813  : (-Dist + (-IVBump - 1)) / (-IVBump);
814  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
815 
816  uint64_t Count = Dist1;
817 
818  if (Count > 0xFFFFFFFFULL)
819  return nullptr;
820 
821  return new CountValue(CountValue::CV_Immediate, Count);
822  }
823 
824  // A general case: Start and End are some values, but the actual
825  // iteration count may not be available. If it is not, insert
826  // a computation of it into the preheader.
827 
828  // If the induction variable bump is not a power of 2, quit.
829  // Othwerise we'd need a general integer division.
830  if (!isPowerOf2_64(std::abs(IVBump)))
831  return nullptr;
832 
833  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
834  assert (PH && "Should have a preheader by now");
836  DebugLoc DL;
837  if (InsertPos != PH->end())
838  DL = InsertPos->getDebugLoc();
839 
840  // If Start is an immediate and End is a register, the trip count
841  // will be "reg - imm". Hexagon's "subtract immediate" instruction
842  // is actually "reg + -imm".
843 
844  // If the loop IV is going downwards, i.e. if the bump is negative,
845  // then the iteration count (computed as End-Start) will need to be
846  // negated. To avoid the negation, just swap Start and End.
847  if (IVBump < 0) {
848  std::swap(Start, End);
849  IVBump = -IVBump;
850  }
851  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
852  // Signedness, and "including equality" are preserved.
853 
854  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
855  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
856 
857  int64_t StartV = 0, EndV = 0;
858  if (Start->isImm())
859  StartV = Start->getImm();
860  if (End->isImm())
861  EndV = End->getImm();
862 
863  int64_t AdjV = 0;
864  // To compute the iteration count, we would need this computation:
865  // Count = (End - Start + (IVBump-1)) / IVBump
866  // or, when CmpHasEqual:
867  // Count = (End - Start + (IVBump-1)+1) / IVBump
868  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
869  // generating an instruction specifically to add it if we can adjust
870  // the immediate values for Start or End.
871 
872  if (CmpHasEqual) {
873  // Need to add 1 to the total iteration count.
874  if (Start->isImm())
875  StartV--;
876  else if (End->isImm())
877  EndV++;
878  else
879  AdjV += 1;
880  }
881 
882  if (Cmp != Comparison::NE) {
883  if (Start->isImm())
884  StartV -= (IVBump-1);
885  else if (End->isImm())
886  EndV += (IVBump-1);
887  else
888  AdjV += (IVBump-1);
889  }
890 
891  Register R = 0;
892  unsigned SR = 0;
893  if (Start->isReg()) {
894  R = Start->getReg();
895  SR = Start->getSubReg();
896  } else {
897  R = End->getReg();
898  SR = End->getSubReg();
899  }
900  const TargetRegisterClass *RC = MRI->getRegClass(R);
901  // Hardware loops cannot handle 64-bit registers. If it's a double
902  // register, it has to have a subregister.
903  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
904  return nullptr;
905  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
906 
907  // Compute DistR (register with the distance between Start and End).
908  Register DistR;
909  unsigned DistSR;
910 
911  // Avoid special case, where the start value is an imm(0).
912  if (Start->isImm() && StartV == 0) {
913  DistR = End->getReg();
914  DistSR = End->getSubReg();
915  } else {
916  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
917  (RegToImm ? TII->get(Hexagon::A2_subri) :
918  TII->get(Hexagon::A2_addi));
919  if (RegToReg || RegToImm) {
920  Register SubR = MRI->createVirtualRegister(IntRC);
921  MachineInstrBuilder SubIB =
922  BuildMI(*PH, InsertPos, DL, SubD, SubR);
923 
924  if (RegToReg)
925  SubIB.addReg(End->getReg(), 0, End->getSubReg())
926  .addReg(Start->getReg(), 0, Start->getSubReg());
927  else
928  SubIB.addImm(EndV)
929  .addReg(Start->getReg(), 0, Start->getSubReg());
930  DistR = SubR;
931  } else {
932  // If the loop has been unrolled, we should use the original loop count
933  // instead of recalculating the value. This will avoid additional
934  // 'Add' instruction.
935  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
936  if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
937  EndValInstr->getOperand(1).getSubReg() == 0 &&
938  EndValInstr->getOperand(2).getImm() == StartV) {
939  DistR = EndValInstr->getOperand(1).getReg();
940  } else {
941  Register SubR = MRI->createVirtualRegister(IntRC);
942  MachineInstrBuilder SubIB =
943  BuildMI(*PH, InsertPos, DL, SubD, SubR);
944  SubIB.addReg(End->getReg(), 0, End->getSubReg())
945  .addImm(-StartV);
946  DistR = SubR;
947  }
948  }
949  DistSR = 0;
950  }
951 
952  // From DistR, compute AdjR (register with the adjusted distance).
953  Register AdjR;
954  unsigned AdjSR;
955 
956  if (AdjV == 0) {
957  AdjR = DistR;
958  AdjSR = DistSR;
959  } else {
960  // Generate CountR = ADD DistR, AdjVal
961  Register AddR = MRI->createVirtualRegister(IntRC);
962  MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
963  BuildMI(*PH, InsertPos, DL, AddD, AddR)
964  .addReg(DistR, 0, DistSR)
965  .addImm(AdjV);
966 
967  AdjR = AddR;
968  AdjSR = 0;
969  }
970 
971  // From AdjR, compute CountR (register with the final count).
972  Register CountR;
973  unsigned CountSR;
974 
975  if (IVBump == 1) {
976  CountR = AdjR;
977  CountSR = AdjSR;
978  } else {
979  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
980  unsigned Shift = Log2_32(IVBump);
981 
982  // Generate NormR = LSR DistR, Shift.
983  Register LsrR = MRI->createVirtualRegister(IntRC);
984  const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
985  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
986  .addReg(AdjR, 0, AdjSR)
987  .addImm(Shift);
988 
989  CountR = LsrR;
990  CountSR = 0;
991  }
992 
993  return new CountValue(CountValue::CV_Register, CountR, CountSR);
994 }
995 
996 /// Return true if the operation is invalid within hardware loop.
997 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
998  bool IsInnerHWLoop) const {
999  // Call is not allowed because the callee may use a hardware loop except for
1000  // the case when the call never returns.
1001  if (MI->getDesc().isCall())
1002  return !TII->doesNotReturn(*MI);
1003 
1004  // Check if the instruction defines a hardware loop register.
1005  using namespace Hexagon;
1006 
1007  static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1008  static const Register Regs1[] = { LC1, SA1 };
1009  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, std::size(Regs01))
1010  : makeArrayRef(Regs1, std::size(Regs1));
1011  for (Register R : CheckRegs)
1012  if (MI->modifiesRegister(R, TRI))
1013  return true;
1014 
1015  return false;
1016 }
1017 
1018 /// Return true if the loop contains an instruction that inhibits
1019 /// the use of the hardware loop instruction.
1020 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1021  bool IsInnerHWLoop) const {
1022  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1023  << printMBBReference(**L->block_begin()));
1024  for (MachineBasicBlock *MBB : L->getBlocks()) {
1025  for (const MachineInstr &MI : *MBB) {
1026  if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
1027  LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1028  MI.dump(););
1029  return true;
1030  }
1031  }
1032  }
1033  return false;
1034 }
1035 
1036 /// Returns true if the instruction is dead. This was essentially
1037 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1038 /// for inline asm, physical registers and instructions with side effects
1039 /// removed.
1040 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1041  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1042  // Examine each operand.
1043  for (const MachineOperand &MO : MI->operands()) {
1044  if (!MO.isReg() || !MO.isDef())
1045  continue;
1046 
1047  Register Reg = MO.getReg();
1048  if (MRI->use_nodbg_empty(Reg))
1049  continue;
1050 
1051  using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1052 
1053  // This instruction has users, but if the only user is the phi node for the
1054  // parent block, and the only use of that phi node is this instruction, then
1055  // this instruction is dead: both it (and the phi node) can be removed.
1056  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1057  use_nodbg_iterator End = MRI->use_nodbg_end();
1058  if (std::next(I) != End || !I->getParent()->isPHI())
1059  return false;
1060 
1061  MachineInstr *OnePhi = I->getParent();
1062  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1063  const MachineOperand &OPO = OnePhi->getOperand(j);
1064  if (!OPO.isReg() || !OPO.isDef())
1065  continue;
1066 
1067  Register OPReg = OPO.getReg();
1068  use_nodbg_iterator nextJ;
1069  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1070  J != End; J = nextJ) {
1071  nextJ = std::next(J);
1072  MachineOperand &Use = *J;
1073  MachineInstr *UseMI = Use.getParent();
1074 
1075  // If the phi node has a user that is not MI, bail.
1076  if (MI != UseMI)
1077  return false;
1078  }
1079  }
1080  DeadPhis.push_back(OnePhi);
1081  }
1082 
1083  // If there are no defs with uses, the instruction is dead.
1084  return true;
1085 }
1086 
1087 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1088  // This procedure was essentially copied from DeadMachineInstructionElim.
1089 
1091  if (isDead(MI, DeadPhis)) {
1092  LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1093 
1094  // It is possible that some DBG_VALUE instructions refer to this
1095  // instruction. Examine each def operand for such references;
1096  // if found, mark the DBG_VALUE as undef (but don't delete it).
1097  for (const MachineOperand &MO : MI->operands()) {
1098  if (!MO.isReg() || !MO.isDef())
1099  continue;
1100  Register Reg = MO.getReg();
1101  // We use make_early_inc_range here because setReg below invalidates the
1102  // iterator.
1103  for (MachineOperand &MO :
1105  MachineInstr *UseMI = MO.getParent();
1106  if (UseMI == MI)
1107  continue;
1108  if (MO.isDebug())
1109  MO.setReg(0U);
1110  }
1111  }
1112 
1113  MI->eraseFromParent();
1114  for (unsigned i = 0; i < DeadPhis.size(); ++i)
1115  DeadPhis[i]->eraseFromParent();
1116  }
1117 }
1118 
1119 /// Check if the loop is a candidate for converting to a hardware
1120 /// loop. If so, then perform the transformation.
1121 ///
1122 /// This function works on innermost loops first. A loop can be converted
1123 /// if it is a counting loop; either a register value or an immediate.
1124 ///
1125 /// The code makes several assumptions about the representation of the loop
1126 /// in llvm.
1127 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1128  bool &RecL0used,
1129  bool &RecL1used) {
1130  // This is just to confirm basic correctness.
1131  assert(L->getHeader() && "Loop without a header?");
1132 
1133  bool Changed = false;
1134  bool L0Used = false;
1135  bool L1Used = false;
1136 
1137  // Process nested loops first.
1138  for (MachineLoop *I : *L) {
1139  Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
1140  L0Used |= RecL0used;
1141  L1Used |= RecL1used;
1142  }
1143 
1144  // If a nested loop has been converted, then we can't convert this loop.
1145  if (Changed && L0Used && L1Used)
1146  return Changed;
1147 
1148  unsigned LOOP_i;
1149  unsigned LOOP_r;
1150  unsigned ENDLOOP;
1151 
1152  // Flag used to track loopN instruction:
1153  // 1 - Hardware loop is being generated for the inner most loop.
1154  // 0 - Hardware loop is being generated for the outer loop.
1155  unsigned IsInnerHWLoop = 1;
1156 
1157  if (L0Used) {
1158  LOOP_i = Hexagon::J2_loop1i;
1159  LOOP_r = Hexagon::J2_loop1r;
1160  ENDLOOP = Hexagon::ENDLOOP1;
1161  IsInnerHWLoop = 0;
1162  } else {
1163  LOOP_i = Hexagon::J2_loop0i;
1164  LOOP_r = Hexagon::J2_loop0r;
1165  ENDLOOP = Hexagon::ENDLOOP0;
1166  }
1167 
1168 #ifndef NDEBUG
1169  // Stop trying after reaching the limit (if any).
1170  int Limit = HWLoopLimit;
1171  if (Limit >= 0) {
1172  if (Counter >= HWLoopLimit)
1173  return false;
1174  Counter++;
1175  }
1176 #endif
1177 
1178  // Does the loop contain any invalid instructions?
1179  if (containsInvalidInstruction(L, IsInnerHWLoop))
1180  return false;
1181 
1182  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1183  // Don't generate hw loop if the loop has more than one exit.
1184  if (!LastMBB)
1185  return false;
1186 
1188  if (LastI == LastMBB->end())
1189  return false;
1190 
1191  // Is the induction variable bump feeding the latch condition?
1192  if (!fixupInductionVariable(L))
1193  return false;
1194 
1195  // Ensure the loop has a preheader: the loop instruction will be
1196  // placed there.
1197  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1198  if (!Preheader) {
1199  Preheader = createPreheaderForLoop(L);
1200  if (!Preheader)
1201  return false;
1202  }
1203 
1204  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1205 
1207  // Are we able to determine the trip count for the loop?
1208  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1209  if (!TripCount)
1210  return false;
1211 
1212  // Is the trip count available in the preheader?
1213  if (TripCount->isReg()) {
1214  // There will be a use of the register inserted into the preheader,
1215  // so make sure that the register is actually defined at that point.
1216  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1217  MachineBasicBlock *BBDef = TCDef->getParent();
1218  if (!MDT->dominates(BBDef, Preheader))
1219  return false;
1220  }
1221 
1222  // Determine the loop start.
1223  MachineBasicBlock *TopBlock = L->getTopBlock();
1224  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1225  MachineBasicBlock *LoopStart = nullptr;
1226  if (ExitingBlock != L->getLoopLatch()) {
1227  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1229 
1230  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1231  return false;
1232 
1233  if (L->contains(TB))
1234  LoopStart = TB;
1235  else if (L->contains(FB))
1236  LoopStart = FB;
1237  else
1238  return false;
1239  }
1240  else
1241  LoopStart = TopBlock;
1242 
1243  // Convert the loop to a hardware loop.
1244  LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1245  DebugLoc DL;
1246  if (InsertPos != Preheader->end())
1247  DL = InsertPos->getDebugLoc();
1248 
1249  if (TripCount->isReg()) {
1250  // Create a copy of the loop count register.
1251  Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1252  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1253  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1254  // Add the Loop instruction to the beginning of the loop.
1255  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1256  .addReg(CountReg);
1257  } else {
1258  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1259  // Add the Loop immediate instruction to the beginning of the loop,
1260  // if the immediate fits in the instructions. Otherwise, we need to
1261  // create a new virtual register.
1262  int64_t CountImm = TripCount->getImm();
1263  if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1264  Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1265  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1266  .addImm(CountImm);
1267  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1268  .addMBB(LoopStart).addReg(CountReg);
1269  } else
1270  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1271  .addMBB(LoopStart).addImm(CountImm);
1272  }
1273 
1274  // Make sure the loop start always has a reference in the CFG.
1275  LoopStart->setMachineBlockAddressTaken();
1276 
1277  // Replace the loop branch with an endloop instruction.
1278  DebugLoc LastIDL = LastI->getDebugLoc();
1279  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1280 
1281  // The loop ends with either:
1282  // - a conditional branch followed by an unconditional branch, or
1283  // - a conditional branch to the loop start.
1284  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1285  LastI->getOpcode() == Hexagon::J2_jumpf) {
1286  // Delete one and change/add an uncond. branch to out of the loop.
1287  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1288  LastI = LastMBB->erase(LastI);
1289  if (!L->contains(BranchTarget)) {
1290  if (LastI != LastMBB->end())
1291  LastI = LastMBB->erase(LastI);
1293  TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1294  }
1295  } else {
1296  // Conditional branch to loop start; just delete it.
1297  LastMBB->erase(LastI);
1298  }
1299  delete TripCount;
1300 
1301  // The induction operation and the comparison may now be
1302  // unneeded. If these are unneeded, then remove them.
1303  for (unsigned i = 0; i < OldInsts.size(); ++i)
1304  removeIfDead(OldInsts[i]);
1305 
1306  ++NumHWLoops;
1307 
1308  // Set RecL1used and RecL0used only after hardware loop has been
1309  // successfully generated. Doing it earlier can cause wrong loop instruction
1310  // to be used.
1311  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1312  RecL1used = true;
1313  else
1314  RecL0used = true;
1315 
1316  return true;
1317 }
1318 
1319 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1320  MachineInstr *CmpI) {
1321  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1322 
1323  MachineBasicBlock *BB = BumpI->getParent();
1324  if (CmpI->getParent() != BB)
1325  return false;
1326 
1327  using instr_iterator = MachineBasicBlock::instr_iterator;
1328 
1329  // Check if things are in order to begin with.
1330  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1331  if (&*I == CmpI)
1332  return true;
1333 
1334  // Out of order.
1335  Register PredR = CmpI->getOperand(0).getReg();
1336  bool FoundBump = false;
1337  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1338  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1339  MachineInstr *In = &*I;
1340  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1341  MachineOperand &MO = In->getOperand(i);
1342  if (MO.isReg() && MO.isUse()) {
1343  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1344  return false;
1345  }
1346  }
1347 
1348  if (In == BumpI) {
1349  BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1350  FoundBump = true;
1351  break;
1352  }
1353  }
1354  assert (FoundBump && "Cannot determine instruction order");
1355  return FoundBump;
1356 }
1357 
1358 /// This function is required to break recursion. Visiting phis in a loop may
1359 /// result in recursion during compilation. We break the recursion by making
1360 /// sure that we visit a MachineOperand and its definition in a
1361 /// MachineInstruction only once. If we attempt to visit more than once, then
1362 /// there is recursion, and will return false.
1363 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1364  MachineInstr *MI,
1365  const MachineOperand *MO,
1366  LoopFeederMap &LoopFeederPhi) const {
1367  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1368  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1369  << printMBBReference(**L->block_begin()));
1370  // Ignore all BBs that form Loop.
1371  if (llvm::is_contained(L->getBlocks(), A))
1372  return false;
1373  MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1374  LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1375  return true;
1376  } else
1377  // Already visited node.
1378  return false;
1379 }
1380 
1381 /// Return true if a Phi may generate a value that can underflow.
1382 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1383 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1384  MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1385  MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1386  assert(Phi->isPHI() && "Expecting a Phi.");
1387  // Walk through each Phi, and its used operands. Make sure that
1388  // if there is recursion in Phi, we won't generate hardware loops.
1389  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1390  if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1391  if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1392  Phi->getParent(), L, LoopFeederPhi))
1393  return true;
1394  return false;
1395 }
1396 
1397 /// Return true if the induction variable can underflow in the first iteration.
1398 /// An example, is an initial unsigned value that is 0 and is decrement in the
1399 /// first itertion of a do-while loop. In this case, we cannot generate a
1400 /// hardware loop because the endloop instruction does not decrement the loop
1401 /// counter if it is <= 1. We only need to perform this analysis if the
1402 /// initial value is a register.
1403 ///
1404 /// This function assumes the initial value may underfow unless proven
1405 /// otherwise. If the type is signed, then we don't care because signed
1406 /// underflow is undefined. We attempt to prove the initial value is not
1407 /// zero by perfoming a crude analysis of the loop counter. This function
1408 /// checks if the initial value is used in any comparison prior to the loop
1409 /// and, if so, assumes the comparison is a range check. This is inexact,
1410 /// but will catch the simple cases.
1411 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1412  const MachineOperand *InitVal, const MachineOperand *EndVal,
1414  LoopFeederMap &LoopFeederPhi) const {
1415  // Only check register values since they are unknown.
1416  if (!InitVal->isReg())
1417  return false;
1418 
1419  if (!EndVal->isImm())
1420  return false;
1421 
1422  // A register value that is assigned an immediate is a known value, and it
1423  // won't underflow in the first iteration.
1424  int64_t Imm;
1425  if (checkForImmediate(*InitVal, Imm))
1426  return (EndVal->getImm() == Imm);
1427 
1428  Register Reg = InitVal->getReg();
1429 
1430  // We don't know the value of a physical register.
1431  if (!Reg.isVirtual())
1432  return true;
1433 
1435  if (!Def)
1436  return true;
1437 
1438  // If the initial value is a Phi or copy and the operands may not underflow,
1439  // then the definition cannot be underflow either.
1440  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1441  L, LoopFeederPhi))
1442  return false;
1443  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1444  EndVal, Def->getParent(),
1445  L, LoopFeederPhi))
1446  return false;
1447 
1448  // Iterate over the uses of the initial value. If the initial value is used
1449  // in a compare, then we assume this is a range check that ensures the loop
1450  // doesn't underflow. This is not an exact test and should be improved.
1452  E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1453  MachineInstr *MI = &*I;
1454  Register CmpReg1, CmpReg2;
1455  int64_t CmpMask = 0, CmpValue = 0;
1456 
1457  if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1458  continue;
1459 
1460  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1462  if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1463  continue;
1464 
1466  getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1467  if (Cmp == 0)
1468  continue;
1469  if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1470  Cmp = Comparison::getNegatedComparison(Cmp);
1471  if (CmpReg2 != 0 && CmpReg2 == Reg)
1472  Cmp = Comparison::getSwappedComparison(Cmp);
1473 
1474  // Signed underflow is undefined.
1475  if (Comparison::isSigned(Cmp))
1476  return false;
1477 
1478  // Check if there is a comparison of the initial value. If the initial value
1479  // is greater than or not equal to another value, then assume this is a
1480  // range check.
1481  if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1482  return false;
1483  }
1484 
1485  // OK - this is a hack that needs to be improved. We really need to analyze
1486  // the instructions performed on the initial value. This works on the simplest
1487  // cases only.
1488  if (!Def->isCopy() && !Def->isPHI())
1489  return false;
1490 
1491  return true;
1492 }
1493 
1494 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1495  int64_t &Val) const {
1496  if (MO.isImm()) {
1497  Val = MO.getImm();
1498  return true;
1499  }
1500  if (!MO.isReg())
1501  return false;
1502 
1503  // MO is a register. Check whether it is defined as an immediate value,
1504  // and if so, get the value of it in TV. That value will then need to be
1505  // processed to handle potential subregisters in MO.
1506  int64_t TV;
1507 
1508  Register R = MO.getReg();
1509  if (!R.isVirtual())
1510  return false;
1511  MachineInstr *DI = MRI->getVRegDef(R);
1512  unsigned DOpc = DI->getOpcode();
1513  switch (DOpc) {
1514  case TargetOpcode::COPY:
1515  case Hexagon::A2_tfrsi:
1516  case Hexagon::A2_tfrpi:
1517  case Hexagon::CONST32:
1518  case Hexagon::CONST64:
1519  // Call recursively to avoid an extra check whether operand(1) is
1520  // indeed an immediate (it could be a global address, for example),
1521  // plus we can handle COPY at the same time.
1522  if (!checkForImmediate(DI->getOperand(1), TV))
1523  return false;
1524  break;
1525  case Hexagon::A2_combineii:
1526  case Hexagon::A4_combineir:
1527  case Hexagon::A4_combineii:
1528  case Hexagon::A4_combineri:
1529  case Hexagon::A2_combinew: {
1530  const MachineOperand &S1 = DI->getOperand(1);
1531  const MachineOperand &S2 = DI->getOperand(2);
1532  int64_t V1, V2;
1533  if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1534  return false;
1535  TV = V2 | (static_cast<uint64_t>(V1) << 32);
1536  break;
1537  }
1538  case TargetOpcode::REG_SEQUENCE: {
1539  const MachineOperand &S1 = DI->getOperand(1);
1540  const MachineOperand &S3 = DI->getOperand(3);
1541  int64_t V1, V3;
1542  if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1543  return false;
1544  unsigned Sub2 = DI->getOperand(2).getImm();
1545  unsigned Sub4 = DI->getOperand(4).getImm();
1546  if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1547  TV = V1 | (V3 << 32);
1548  else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1549  TV = V3 | (V1 << 32);
1550  else
1551  llvm_unreachable("Unexpected form of REG_SEQUENCE");
1552  break;
1553  }
1554 
1555  default:
1556  return false;
1557  }
1558 
1559  // By now, we should have successfully obtained the immediate value defining
1560  // the register referenced in MO. Handle a potential use of a subregister.
1561  switch (MO.getSubReg()) {
1562  case Hexagon::isub_lo:
1563  Val = TV & 0xFFFFFFFFULL;
1564  break;
1565  case Hexagon::isub_hi:
1566  Val = (TV >> 32) & 0xFFFFFFFFULL;
1567  break;
1568  default:
1569  Val = TV;
1570  break;
1571  }
1572  return true;
1573 }
1574 
1575 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1576  if (MO.isImm()) {
1577  MO.setImm(Val);
1578  return;
1579  }
1580 
1581  assert(MO.isReg());
1582  Register R = MO.getReg();
1583  MachineInstr *DI = MRI->getVRegDef(R);
1584 
1585  const TargetRegisterClass *RC = MRI->getRegClass(R);
1586  Register NewR = MRI->createVirtualRegister(RC);
1587  MachineBasicBlock &B = *DI->getParent();
1588  DebugLoc DL = DI->getDebugLoc();
1589  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1590  MO.setReg(NewR);
1591 }
1592 
1593 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1594  MachineBasicBlock *Header = L->getHeader();
1595  MachineBasicBlock *Latch = L->getLoopLatch();
1596  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1597 
1598  if (!(Header && Latch && ExitingBlock))
1599  return false;
1600 
1601  // These data structures follow the same concept as the corresponding
1602  // ones in findInductionRegister (where some comments are).
1603  using RegisterBump = std::pair<Register, int64_t>;
1604  using RegisterInduction = std::pair<Register, RegisterBump>;
1605  using RegisterInductionSet = std::set<RegisterInduction>;
1606 
1607  // Register candidates for induction variables, with their associated bumps.
1608  RegisterInductionSet IndRegs;
1609 
1610  // Look for induction patterns:
1611  // %1 = PHI ..., [ latch, %2 ]
1612  // %2 = ADD %1, imm
1613  using instr_iterator = MachineBasicBlock::instr_iterator;
1614 
1615  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1616  I != E && I->isPHI(); ++I) {
1617  MachineInstr *Phi = &*I;
1618 
1619  // Have a PHI instruction.
1620  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1621  if (Phi->getOperand(i+1).getMBB() != Latch)
1622  continue;
1623 
1624  Register PhiReg = Phi->getOperand(i).getReg();
1625  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1626 
1627  if (DI->getDesc().isAdd()) {
1628  // If the register operand to the add/sub is the PHI we are looking
1629  // at, this meets the induction pattern.
1630  Register IndReg = DI->getOperand(1).getReg();
1631  MachineOperand &Opnd2 = DI->getOperand(2);
1632  int64_t V;
1633  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1634  Register UpdReg = DI->getOperand(0).getReg();
1635  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1636  }
1637  }
1638  } // for (i)
1639  } // for (instr)
1640 
1641  if (IndRegs.empty())
1642  return false;
1643 
1644  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1646  // analyzeBranch returns true if it fails to analyze branch.
1647  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1648  if (NotAnalyzed || Cond.empty())
1649  return false;
1650 
1651  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1652  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1654  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1655  if (NotAnalyzed)
1656  return false;
1657 
1658  // Since latch is not the exiting block, the latch branch should be an
1659  // unconditional branch to the loop header.
1660  if (TB == Latch)
1661  TB = (LTB == Header) ? LTB : LFB;
1662  else
1663  FB = (LTB == Header) ? LTB : LFB;
1664  }
1665  if (TB != Header) {
1666  if (FB != Header) {
1667  // The latch/exit block does not go back to the header.
1668  return false;
1669  }
1670  // FB is the header (i.e., uncond. jump to branch header)
1671  // In this case, the LoopBody -> TB should not be a back edge otherwise
1672  // it could result in an infinite loop after conversion to hw_loop.
1673  // This case can happen when the Latch has two jumps like this:
1674  // Jmp_c OuterLoopHeader <-- TB
1675  // Jmp InnerLoopHeader <-- FB
1676  if (MDT->dominates(TB, FB))
1677  return false;
1678  }
1679 
1680  // Expecting a predicate register as a condition. It won't be a hardware
1681  // predicate register at this point yet, just a vreg.
1682  // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1683  // into Cond, followed by the predicate register. For non-negated branches
1684  // it's just the register.
1685  unsigned CSz = Cond.size();
1686  if (CSz != 1 && CSz != 2)
1687  return false;
1688 
1689  if (!Cond[CSz-1].isReg())
1690  return false;
1691 
1692  Register P = Cond[CSz - 1].getReg();
1693  MachineInstr *PredDef = MRI->getVRegDef(P);
1694 
1695  if (!PredDef->isCompare())
1696  return false;
1697 
1698  SmallSet<Register,2> CmpRegs;
1699  MachineOperand *CmpImmOp = nullptr;
1700 
1701  // Go over all operands to the compare and look for immediate and register
1702  // operands. Assume that if the compare has a single register use and a
1703  // single immediate operand, then the register is being compared with the
1704  // immediate value.
1705  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1706  MachineOperand &MO = PredDef->getOperand(i);
1707  if (MO.isReg()) {
1708  // Skip all implicit references. In one case there was:
1709  // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1710  if (MO.isImplicit())
1711  continue;
1712  if (MO.isUse()) {
1713  if (!isImmediate(MO)) {
1714  CmpRegs.insert(MO.getReg());
1715  continue;
1716  }
1717  // Consider the register to be the "immediate" operand.
1718  if (CmpImmOp)
1719  return false;
1720  CmpImmOp = &MO;
1721  }
1722  } else if (MO.isImm()) {
1723  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1724  return false;
1725  CmpImmOp = &MO;
1726  }
1727  }
1728 
1729  if (CmpRegs.empty())
1730  return false;
1731 
1732  // Check if the compared register follows the order we want. Fix if needed.
1733  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1734  I != E; ++I) {
1735  // This is a success. If the register used in the comparison is one that
1736  // we have identified as a bumped (updated) induction register, there is
1737  // nothing to do.
1738  if (CmpRegs.count(I->first))
1739  return true;
1740 
1741  // Otherwise, if the register being compared comes out of a PHI node,
1742  // and has been recognized as following the induction pattern, and is
1743  // compared against an immediate, we can fix it.
1744  const RegisterBump &RB = I->second;
1745  if (CmpRegs.count(RB.first)) {
1746  if (!CmpImmOp) {
1747  // If both operands to the compare instruction are registers, see if
1748  // it can be changed to use induction register as one of the operands.
1749  MachineInstr *IndI = nullptr;
1750  MachineInstr *nonIndI = nullptr;
1751  MachineOperand *IndMO = nullptr;
1752  MachineOperand *nonIndMO = nullptr;
1753 
1754  for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1755  MachineOperand &MO = PredDef->getOperand(i);
1756  if (MO.isReg() && MO.getReg() == RB.first) {
1757  LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1758  << ") = " << *(MRI->getVRegDef(I->first)));
1759  if (IndI)
1760  return false;
1761 
1762  IndI = MRI->getVRegDef(I->first);
1763  IndMO = &MO;
1764  } else if (MO.isReg()) {
1765  LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1766  << ") = " << *(MRI->getVRegDef(MO.getReg())));
1767  if (nonIndI)
1768  return false;
1769 
1770  nonIndI = MRI->getVRegDef(MO.getReg());
1771  nonIndMO = &MO;
1772  }
1773  }
1774  if (IndI && nonIndI &&
1775  nonIndI->getOpcode() == Hexagon::A2_addi &&
1776  nonIndI->getOperand(2).isImm() &&
1777  nonIndI->getOperand(2).getImm() == - RB.second) {
1778  bool Order = orderBumpCompare(IndI, PredDef);
1779  if (Order) {
1780  IndMO->setReg(I->first);
1781  nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1782  return true;
1783  }
1784  }
1785  return false;
1786  }
1787 
1788  // It is not valid to do this transformation on an unsigned comparison
1789  // because it may underflow.
1791  getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1792  if (!Cmp || Comparison::isUnsigned(Cmp))
1793  return false;
1794 
1795  // If the register is being compared against an immediate, try changing
1796  // the compare instruction to use induction register and adjust the
1797  // immediate operand.
1798  int64_t CmpImm = getImmediate(*CmpImmOp);
1799  int64_t V = RB.second;
1800  // Handle Overflow (64-bit).
1801  if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1802  ((V < 0) && (CmpImm < INT64_MIN - V)))
1803  return false;
1804  CmpImm += V;
1805  // Most comparisons of register against an immediate value allow
1806  // the immediate to be constant-extended. There are some exceptions
1807  // though. Make sure the new combination will work.
1808  if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
1809  !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
1810  return false;
1811 
1812  // Make sure that the compare happens after the bump. Otherwise,
1813  // after the fixup, the compare would use a yet-undefined register.
1814  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1815  bool Order = orderBumpCompare(BumpI, PredDef);
1816  if (!Order)
1817  return false;
1818 
1819  // Finally, fix the compare instruction.
1820  setImmediate(*CmpImmOp, CmpImm);
1821  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1822  MachineOperand &MO = PredDef->getOperand(i);
1823  if (MO.isReg() && MO.getReg() == RB.first) {
1824  MO.setReg(I->first);
1825  return true;
1826  }
1827  }
1828  }
1829  }
1830 
1831  return false;
1832 }
1833 
1834 /// createPreheaderForLoop - Create a preheader for a given loop.
1835 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1836  MachineLoop *L) {
1837  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1838  return TmpPH;
1839  if (!HWCreatePreheader)
1840  return nullptr;
1841 
1842  MachineBasicBlock *Header = L->getHeader();
1843  MachineBasicBlock *Latch = L->getLoopLatch();
1844  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1845  MachineFunction *MF = Header->getParent();
1846  DebugLoc DL;
1847 
1848 #ifndef NDEBUG
1849  if ((!PHFn.empty()) && (PHFn != MF->getName()))
1850  return nullptr;
1851 #endif
1852 
1853  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1854  return nullptr;
1855 
1856  using instr_iterator = MachineBasicBlock::instr_iterator;
1857 
1858  // Verify that all existing predecessors have analyzable branches
1859  // (or no branches at all).
1860  using MBBVector = std::vector<MachineBasicBlock *>;
1861 
1862  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1864  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1865 
1866  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1867  return nullptr;
1868 
1869  for (MachineBasicBlock *PB : Preds) {
1870  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1871  if (NotAnalyzed)
1872  return nullptr;
1873  }
1874 
1876  MF->insert(Header->getIterator(), NewPH);
1877 
1878  if (Header->pred_size() > 2) {
1879  // Ensure that the header has only two predecessors: the preheader and
1880  // the loop latch. Any additional predecessors of the header should
1881  // join at the newly created preheader. Inspect all PHI nodes from the
1882  // header and create appropriate corresponding PHI nodes in the preheader.
1883 
1884  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1885  I != E && I->isPHI(); ++I) {
1886  MachineInstr *PN = &*I;
1887 
1888  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1889  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1890  NewPH->insert(NewPH->end(), NewPN);
1891 
1892  Register PR = PN->getOperand(0).getReg();
1893  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1894  Register NewPR = MRI->createVirtualRegister(RC);
1895  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1896 
1897  // Copy all non-latch operands of a header's PHI node to the newly
1898  // created PHI node in the preheader.
1899  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1900  Register PredR = PN->getOperand(i).getReg();
1901  unsigned PredRSub = PN->getOperand(i).getSubReg();
1902  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1903  if (PredB == Latch)
1904  continue;
1905 
1906  MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1907  MO.setSubReg(PredRSub);
1908  NewPN->addOperand(MO);
1909  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1910  }
1911 
1912  // Remove copied operands from the old PHI node and add the value
1913  // coming from the preheader's PHI.
1914  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1915  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1916  if (PredB != Latch) {
1917  PN->removeOperand(i+1);
1918  PN->removeOperand(i);
1919  }
1920  }
1921  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1923  }
1924  } else {
1925  assert(Header->pred_size() == 2);
1926 
1927  // The header has only two predecessors, but the non-latch predecessor
1928  // is not a preheader (e.g. it has other successors, etc.)
1929  // In such a case we don't need any extra PHI nodes in the new preheader,
1930  // all we need is to adjust existing PHIs in the header to now refer to
1931  // the new preheader.
1932  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1933  I != E && I->isPHI(); ++I) {
1934  MachineInstr *PN = &*I;
1935  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1936  MachineOperand &MO = PN->getOperand(i+1);
1937  if (MO.getMBB() != Latch)
1938  MO.setMBB(NewPH);
1939  }
1940  }
1941  }
1942 
1943  // "Reroute" the CFG edges to link in the new preheader.
1944  // If any of the predecessors falls through to the header, insert a branch
1945  // to the new preheader in that place.
1948 
1949  TB = FB = nullptr;
1950 
1951  for (MachineBasicBlock *PB : Preds) {
1952  if (PB != Latch) {
1953  Tmp2.clear();
1954  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1955  (void)NotAnalyzed; // suppress compiler warning
1956  assert (!NotAnalyzed && "Should be analyzable!");
1957  if (TB != Header && (Tmp2.empty() || FB != Header))
1958  TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1959  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1960  }
1961  }
1962 
1963  // It can happen that the latch block will fall through into the header.
1964  // Insert an unconditional branch to the header.
1965  TB = FB = nullptr;
1966  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1967  (void)LatchNotAnalyzed; // suppress compiler warning
1968  assert (!LatchNotAnalyzed && "Should be analyzable!");
1969  if (!TB && !FB)
1970  TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1971 
1972  // Finally, the branch from the preheader to the header.
1973  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1974  NewPH->addSuccessor(Header);
1975 
1976  MachineLoop *ParentLoop = L->getParentLoop();
1977  if (ParentLoop)
1978  ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1979 
1980  // Update the dominator information with the new preheader.
1981  if (MDT) {
1982  if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1983  if (MachineDomTreeNode *DHN = HN->getIDom()) {
1984  MDT->addNewBlock(NewPH, DHN->getBlock());
1985  MDT->changeImmediateDominator(Header, NewPH);
1986  }
1987  }
1988  }
1989 
1990  return NewPH;
1991 }
i
i
Definition: README.txt:29
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:353
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:546
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:107
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::HexagonInstrInfo::analyzeCompare
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
Definition: HexagonInstrInfo.cpp:1872
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:800
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::MachineDominatorTree::properlyDominates
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:146
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::HexagonInstrInfo::analyzeBranch
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: HexagonInstrInfo.cpp:433
llvm::MachineInstr::isCompare
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:941
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:374
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:117
HexagonSubtarget.h
llvm::HexagonInstrInfo::predOpcodeHasNot
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
Definition: HexagonInstrInfo.cpp:3237
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:277
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_instr_nodbg_begin
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:543
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
Shift
bool Shift
Definition: README.txt:468
llvm::MachineFunction::CreateMachineInstr
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
Definition: MachineFunction.cpp:373
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:477
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:873
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:114
STLExtras.h
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:1218
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:526
MachineRegisterInfo.h
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1314
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:762
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
Constants.h
llvm::MachineDominatorTree::addNewBlock
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
Definition: MachineDominators.h:182
llvm::HexagonInstrInfo::getPredReg
bool getPredReg(ArrayRef< MachineOperand > Cond, Register &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const
Definition: HexagonInstrInfo.cpp:4513
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
llvm::HexagonInstrInfo::doesNotReturn
bool doesNotReturn(const MachineInstr &CallMI) const
Definition: HexagonInstrInfo.cpp:3072
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:792
EQ
#define EQ(a, b)
Definition: regexec.c:112
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition: RISCVRedundantCopyElimination.cpp:76
llvm::MachineDominatorTree::changeImmediateDominator
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: MachineDominators.h:191
llvm::MachineLoop::findLoopControlBlock
MachineBasicBlock * findLoopControlBlock()
Find the block that contains the loop control variable and the loop test.
Definition: MachineLoopInfo.cpp:91
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::HexagonInstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
Definition: HexagonInstrInfo.cpp:626
llvm::MCOI::BranchTarget
@ BranchTarget
Definition: MCInstrDesc.h:53
llvm::MachineRegisterInfo::use_nodbg_end
static use_nodbg_iterator use_nodbg_end()
Definition: MachineRegisterInfo.h:529
INT64_MAX
#define INT64_MAX
Definition: DataTypes.h:71
DebugLoc.h
HexagonInstrInfo.h
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:237
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:193
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::HexagonSubtarget::getInstrInfo
const HexagonInstrInfo * getInstrInfo() const override
Definition: HexagonSubtarget.h:124
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
hwloops
hwloops
Definition: HexagonHardwareLoops.cpp:373
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:355
llvm::MCInstrDesc::isAdd
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:276
llvm::HexagonISD::CONST32
@ CONST32
Definition: HexagonISelLowering.h:37
PHFn
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:825
llvm::MachineRegisterInfo::use_nodbg_iterator
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
Definition: MachineRegisterInfo.h:525
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineDominatorTree::getNode
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: MachineDominators.h:174
I
#define I(x, y, z)
Definition: MD5.cpp:58
SpecPreheader
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::Hidden, cl::desc("Allow speculation of preheader " "instructions"))
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
size
i< reg-> size
Definition: README.txt:166
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:439
ArrayRef.h
MachineFunctionPass.h
llvm::MachineBasicBlock::pred_iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
Definition: MachineBasicBlock.h:341
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1300
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
isReg
static bool isReg(const MCInst &MI, unsigned OpNo)
Definition: MipsInstPrinter.cpp:31
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::HexagonInstrInfo
Definition: HexagonInstrInfo.h:38
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:574
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::MachineBasicBlock::setMachineBlockAddressTaken
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
Definition: MachineBasicBlock.h:247
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::X86II::TB
@ TB
Definition: X86BaseInfo.h:810
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::HexagonInstrInfo::isExtendable
bool isExtendable(const MachineInstr &MI) const
Definition: HexagonInstrInfo.cpp:2276
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
HWLoopLimit
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:136
llvm::DomTreeNodeBase
Base class for the actual dominator tree node.
Definition: LiveIntervalCalc.h:24
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:364
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::HexagonSubtarget::getRegisterInfo
const HexagonRegisterInfo * getRegisterInfo() const override
Definition: HexagonSubtarget.h:125
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1327
llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition: MachineBasicBlock.h:264
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
LC0
into eax xorps xmm0 xmm0 eax xmm0 eax xmm0 ret esp eax movdqa LC0
Definition: README-SSE.txt:632
llvm::HexagonInstrInfo::isValidOffset
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
Definition: HexagonInstrInfo.cpp:2738
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", "Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::MachineInstr::removeOperand
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition: MachineInstr.cpp:284
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:445
HWCreatePreheader
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
isSigned
static bool isSigned(unsigned int Opcode)
Definition: ExpandLargeDivRem.cpp:52
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
MachineInstrBuilder.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachineInstr::addOperand
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Definition: MachineInstr.cpp:192
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:698
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::HexagonRegisterInfo
Definition: HexagonRegisterInfo.h:29
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::createHexagonHardwareLoops
FunctionPass * createHexagonHardwareLoops()
Definition: HexagonHardwareLoops.cpp:376
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:463
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::initializeHexagonHardwareLoopsPass
void initializeHexagonHardwareLoopsPass(PassRegistry &)
InitializePasses.h
llvm::SmallSet::empty
bool empty() const
Definition: SmallSet.h:158
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
MachineDominators.h
SmallSet.h
INT64_MIN
#define INT64_MIN
Definition: DataTypes.h:74