LCOV - code coverage report
Current view: top level - lib/CodeGen - PeepholeOptimizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 633 687 92.1 %
Date: 2017-09-14 15:23:50 Functions: 47 57 82.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Perform peephole optimizations on the machine code:
      11             : //
      12             : // - Optimize Extensions
      13             : //
      14             : //     Optimization of sign / zero extension instructions. It may be extended to
      15             : //     handle other instructions with similar properties.
      16             : //
      17             : //     On some targets, some instructions, e.g. X86 sign / zero extension, may
      18             : //     leave the source value in the lower part of the result. This optimization
      19             : //     will replace some uses of the pre-extension value with uses of the
      20             : //     sub-register of the results.
      21             : //
      22             : // - Optimize Comparisons
      23             : //
      24             : //     Optimization of comparison instructions. For instance, in this code:
      25             : //
      26             : //       sub r1, 1
      27             : //       cmp r1, 0
      28             : //       bz  L1
      29             : //
      30             : //     If the "sub" instruction all ready sets (or could be modified to set) the
      31             : //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
      32             : //     eliminate the "cmp" instruction.
      33             : //
      34             : //     Another instance, in this code:
      35             : //
      36             : //       sub r1, r3 | sub r1, imm
      37             : //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
      38             : //       bge L1
      39             : //
      40             : //     If the branch instruction can use flag from "sub", then we can replace
      41             : //     "sub" with "subs" and eliminate the "cmp" instruction.
      42             : //
      43             : // - Optimize Loads:
      44             : //
      45             : //     Loads that can be folded into a later instruction. A load is foldable
      46             : //     if it loads to virtual registers and the virtual register defined has
      47             : //     a single use.
      48             : //
      49             : // - Optimize Copies and Bitcast (more generally, target specific copies):
      50             : //
      51             : //     Rewrite copies and bitcasts to avoid cross register bank copies
      52             : //     when possible.
      53             : //     E.g., Consider the following example, where capital and lower
      54             : //     letters denote different register file:
      55             : //     b = copy A <-- cross-bank copy
      56             : //     C = copy b <-- cross-bank copy
      57             : //   =>
      58             : //     b = copy A <-- cross-bank copy
      59             : //     C = copy A <-- same-bank copy
      60             : //
      61             : //     E.g., for bitcast:
      62             : //     b = bitcast A <-- cross-bank copy
      63             : //     C = bitcast b <-- cross-bank copy
      64             : //   =>
      65             : //     b = bitcast A <-- cross-bank copy
      66             : //     C = copy A    <-- same-bank copy
      67             : //===----------------------------------------------------------------------===//
      68             : 
      69             : #include "llvm/ADT/DenseMap.h"
      70             : #include "llvm/ADT/Optional.h"
      71             : #include "llvm/ADT/SmallPtrSet.h"
      72             : #include "llvm/ADT/SmallSet.h"
      73             : #include "llvm/ADT/SmallVector.h"
      74             : #include "llvm/ADT/Statistic.h"
      75             : #include "llvm/CodeGen/MachineBasicBlock.h"
      76             : #include "llvm/CodeGen/MachineDominators.h"
      77             : #include "llvm/CodeGen/MachineFunction.h"
      78             : #include "llvm/CodeGen/MachineFunctionPass.h"
      79             : #include "llvm/CodeGen/MachineInstr.h"
      80             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      81             : #include "llvm/CodeGen/MachineLoopInfo.h"
      82             : #include "llvm/CodeGen/MachineOperand.h"
      83             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      84             : #include "llvm/MC/LaneBitmask.h"
      85             : #include "llvm/MC/MCInstrDesc.h"
      86             : #include "llvm/Pass.h"
      87             : #include "llvm/Support/CommandLine.h"
      88             : #include "llvm/Support/Debug.h"
      89             : #include "llvm/Support/ErrorHandling.h"
      90             : #include "llvm/Support/raw_ostream.h"
      91             : #include "llvm/Target/TargetInstrInfo.h"
      92             : #include "llvm/Target/TargetOpcodes.h"
      93             : #include "llvm/Target/TargetRegisterInfo.h"
      94             : #include "llvm/Target/TargetSubtargetInfo.h"
      95             : #include <cassert>
      96             : #include <cstdint>
      97             : #include <memory>
      98             : #include <utility>
      99             : 
     100             : using namespace llvm;
     101             : 
     102             : #define DEBUG_TYPE "peephole-opt"
     103             : 
     104             : // Optimize Extensions
     105             : static cl::opt<bool>
     106       72306 : Aggressive("aggressive-ext-opt", cl::Hidden,
     107      144612 :            cl::desc("Aggressive extension optimization"));
     108             : 
     109             : static cl::opt<bool>
     110      289224 : DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
     111      289224 :                 cl::desc("Disable the peephole optimizer"));
     112             : 
     113             : static cl::opt<bool>
     114      289224 : DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
     115      289224 :                   cl::desc("Disable advanced copy optimization"));
     116             : 
     117       72306 : static cl::opt<bool> DisableNAPhysCopyOpt(
     118      216918 :     "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
     119      289224 :     cl::desc("Disable non-allocatable physical register copy optimization"));
     120             : 
     121             : // Limit the number of PHI instructions to process
     122             : // in PeepholeOptimizer::getNextSource.
     123       72306 : static cl::opt<unsigned> RewritePHILimit(
     124      216918 :     "rewrite-phi-limit", cl::Hidden, cl::init(10),
     125      289224 :     cl::desc("Limit the length of PHI chains to lookup"));
     126             : 
     127             : // Limit the length of recurrence chain when evaluating the benefit of
     128             : // commuting operands.
     129       72306 : static cl::opt<unsigned> MaxRecurrenceChain(
     130      216918 :     "recurrence-chain-limit", cl::Hidden, cl::init(3),
     131      216918 :     cl::desc("Maximum length of recurrence chain when evaluating the benefit "
     132       72306 :              "of commuting operands"));
     133             : 
     134             : 
     135             : STATISTIC(NumReuse,      "Number of extension results reused");
     136             : STATISTIC(NumCmps,       "Number of compares eliminated");
     137             : STATISTIC(NumImmFold,    "Number of move immediate folded");
     138             : STATISTIC(NumLoadFold,   "Number of loads folded");
     139             : STATISTIC(NumSelects,    "Number of selects optimized");
     140             : STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
     141             : STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
     142             : STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
     143             : 
     144             : namespace {
     145             : 
     146             :   class ValueTrackerResult;
     147             :   class RecurrenceInstr;
     148             : 
     149       15528 :   class PeepholeOptimizer : public MachineFunctionPass {
     150             :     const TargetInstrInfo *TII;
     151             :     const TargetRegisterInfo *TRI;
     152             :     MachineRegisterInfo   *MRI;
     153             :     MachineDominatorTree  *DT;  // Machine dominator tree
     154             :     MachineLoopInfo       *MLI;
     155             : 
     156             :   public:
     157             :     static char ID; // Pass identification
     158             : 
     159       15627 :     PeepholeOptimizer() : MachineFunctionPass(ID) {
     160       15627 :       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     161       15627 :     }
     162             : 
     163             :     bool runOnMachineFunction(MachineFunction &MF) override;
     164             : 
     165       15574 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     166       15574 :       AU.setPreservesCFG();
     167       15574 :       MachineFunctionPass::getAnalysisUsage(AU);
     168       15574 :       AU.addRequired<MachineLoopInfo>();
     169       15574 :       AU.addPreserved<MachineLoopInfo>();
     170       15574 :       if (Aggressive) {
     171           0 :         AU.addRequired<MachineDominatorTree>();
     172             :         AU.addPreserved<MachineDominatorTree>();
     173             :       }
     174       15574 :     }
     175             : 
     176             :     /// \brief Track Def -> Use info used for rewriting copies.
     177             :     using RewriteMapTy =
     178             :         SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>;
     179             : 
     180             :     /// \brief Sequence of instructions that formulate recurrence cycle.
     181             :     using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
     182             : 
     183             :   private:
     184             :     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     185             :     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
     186             :                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
     187             :     bool optimizeSelect(MachineInstr *MI,
     188             :                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
     189             :     bool optimizeCondBranch(MachineInstr *MI);
     190             :     bool optimizeCoalescableCopy(MachineInstr *MI);
     191             :     bool optimizeUncoalescableCopy(MachineInstr *MI,
     192             :                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
     193             :     bool optimizeRecurrence(MachineInstr &PHI);
     194             :     bool findNextSource(unsigned Reg, unsigned SubReg,
     195             :                         RewriteMapTy &RewriteMap);
     196             :     bool isMoveImmediate(MachineInstr *MI,
     197             :                          SmallSet<unsigned, 4> &ImmDefRegs,
     198             :                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     199             :     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
     200             :                        SmallSet<unsigned, 4> &ImmDefRegs,
     201             :                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     202             : 
     203             :     /// \brief Finds recurrence cycles, but only ones that formulated around
     204             :     /// a def operand and a use operand that are tied. If there is a use
     205             :     /// operand commutable with the tied use operand, find recurrence cycle
     206             :     /// along that operand as well.
     207             :     bool findTargetRecurrence(unsigned Reg,
     208             :                               const SmallSet<unsigned, 2> &TargetReg,
     209             :                               RecurrenceCycle &RC);
     210             : 
     211             :     /// \brief If copy instruction \p MI is a virtual register copy, track it in
     212             :     /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
     213             :     /// previously seen as a copy, replace the uses of this copy with the
     214             :     /// previously seen copy's destination register.
     215             :     bool foldRedundantCopy(MachineInstr *MI,
     216             :                            SmallSet<unsigned, 4> &CopySrcRegs,
     217             :                            DenseMap<unsigned, MachineInstr *> &CopyMIs);
     218             : 
     219             :     /// \brief Is the register \p Reg a non-allocatable physical register?
     220             :     bool isNAPhysCopy(unsigned Reg);
     221             : 
     222             :     /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
     223             :     /// register copy, track it in the \p NAPhysToVirtMIs map. If this
     224             :     /// non-allocatable physical register was previously copied to a virtual
     225             :     /// registered and hasn't been clobbered, the virt->phys copy can be
     226             :     /// deleted.
     227             :     bool foldRedundantNAPhysCopy(
     228             :         MachineInstr *MI,
     229             :         DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
     230             : 
     231             :     bool isLoadFoldable(MachineInstr *MI,
     232             :                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
     233             : 
     234             :     /// \brief Check whether \p MI is understood by the register coalescer
     235             :     /// but may require some rewriting.
     236             :     bool isCoalescableCopy(const MachineInstr &MI) {
     237             :       // SubregToRegs are not interesting, because they are already register
     238             :       // coalescer friendly.
     239     9244230 :       return MI.isCopy() || (!DisableAdvCopyOpt &&
     240     8112310 :                              (MI.isRegSequence() || MI.isInsertSubreg() ||
     241     2676230 :                               MI.isExtractSubreg()));
     242             :     }
     243             : 
     244             :     /// \brief Check whether \p MI is a copy like instruction that is
     245             :     /// not recognized by the register coalescer.
     246     3759048 :     bool isUncoalescableCopy(const MachineInstr &MI) {
     247     7515336 :       return MI.isBitcast() ||
     248     7512306 :              (!DisableAdvCopyOpt &&
     249    11265498 :               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
     250     7513700 :                MI.isExtractSubregLike()));
     251             :     }
     252             :   };
     253             : 
     254             :   /// \brief Helper class to hold instructions that are inside recurrence
     255             :   /// cycles. The recurrence cycle is formulated around 1) a def operand and its
     256             :   /// tied use operand, or 2) a def operand and a use operand that is commutable
     257             :   /// with another use operand which is tied to the def operand. In the latter
     258             :   /// case, index of the tied use operand and the commutable use operand are
     259             :   /// maintained with CommutePair.
     260        4452 :   class RecurrenceInstr {
     261             :   public:
     262             :     using IndexPair = std::pair<unsigned, unsigned>;
     263             : 
     264        2744 :     RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
     265             :     RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
     266         336 :       : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
     267             : 
     268             :     MachineInstr *getMI() const { return MI; }
     269        2436 :     Optional<IndexPair> getCommutePair() const { return CommutePair; }
     270             : 
     271             :   private:
     272             :     MachineInstr *MI;
     273             :     Optional<IndexPair> CommutePair;
     274             :   };
     275             : 
     276             :   /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
     277             :   /// returned sources for a given search and the instructions where the sources
     278             :   /// were tracked from.
     279    17755094 :   class ValueTrackerResult {
     280             :   private:
     281             :     /// Track all sources found by one ValueTracker query.
     282             :     SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
     283             : 
     284             :     /// Instruction using the sources in 'RegSrcs'.
     285             :     const MachineInstr *Inst = nullptr;
     286             : 
     287             :   public:
     288     3796764 :     ValueTrackerResult() = default;
     289             : 
     290     1598124 :     ValueTrackerResult(unsigned Reg, unsigned SubReg) {
     291      799062 :       addSource(Reg, SubReg);
     292             :     }
     293             : 
     294     3595640 :     bool isValid() const { return getNumSources() > 0; }
     295             : 
     296      799183 :     void setInst(const MachineInstr *I) { Inst = I; }
     297             :     const MachineInstr *getInst() const { return Inst; }
     298             : 
     299             :     void clear() {
     300             :       RegSrcs.clear();
     301             :       Inst = nullptr;
     302             :     }
     303             : 
     304             :     void addSource(unsigned SrcReg, unsigned SrcSubReg) {
     305     1598624 :       RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
     306             :     }
     307             : 
     308             :     void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
     309             :       assert(Idx < getNumSources() && "Reg pair source out of index");
     310             :       RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
     311             :     }
     312             : 
     313    10737410 :     int getNumSources() const { return RegSrcs.size(); }
     314             : 
     315             :     unsigned getSrcReg(int Idx) const {
     316             :       assert(Idx < getNumSources() && "Reg source out of index");
     317     3546146 :       return RegSrcs[Idx].Reg;
     318             :     }
     319             : 
     320             :     unsigned getSrcSubReg(int Idx) const {
     321             :       assert(Idx < getNumSources() && "SubReg source out of index");
     322     2912820 :       return RegSrcs[Idx].SubReg;
     323             :     }
     324             : 
     325             :     bool operator==(const ValueTrackerResult &Other) {
     326             :       if (Other.getInst() != getInst())
     327             :         return false;
     328             : 
     329             :       if (Other.getNumSources() != getNumSources())
     330             :         return false;
     331             : 
     332             :       for (int i = 0, e = Other.getNumSources(); i != e; ++i)
     333             :         if (Other.getSrcReg(i) != getSrcReg(i) ||
     334             :             Other.getSrcSubReg(i) != getSrcSubReg(i))
     335             :           return false;
     336             :       return true;
     337             :     }
     338             :   };
     339             : 
     340             :   /// \brief Helper class to track the possible sources of a value defined by
     341             :   /// a (chain of) copy related instructions.
     342             :   /// Given a definition (instruction and definition index), this class
     343             :   /// follows the use-def chain to find successive suitable sources.
     344             :   /// The given source can be used to rewrite the definition into
     345             :   /// def = COPY src.
     346             :   ///
     347             :   /// For instance, let us consider the following snippet:
     348             :   /// v0 =
     349             :   /// v2 = INSERT_SUBREG v1, v0, sub0
     350             :   /// def = COPY v2.sub0
     351             :   ///
     352             :   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
     353             :   /// suitable sources:
     354             :   /// v2.sub0 and v0.
     355             :   /// Then, def can be rewritten into def = COPY v0.
     356             :   class ValueTracker {
     357             :   private:
     358             :     /// The current point into the use-def chain.
     359             :     const MachineInstr *Def = nullptr;
     360             : 
     361             :     /// The index of the definition in Def.
     362             :     unsigned DefIdx = 0;
     363             : 
     364             :     /// The sub register index of the definition.
     365             :     unsigned DefSubReg;
     366             : 
     367             :     /// The register where the value can be found.
     368             :     unsigned Reg;
     369             : 
     370             :     /// Specifiy whether or not the value tracking looks through
     371             :     /// complex instructions. When this is false, the value tracker
     372             :     /// bails on everything that is not a copy or a bitcast.
     373             :     ///
     374             :     /// Note: This could have been implemented as a specialized version of
     375             :     /// the ValueTracker class but that would have complicated the code of
     376             :     /// the users of this class.
     377             :     bool UseAdvancedTracking;
     378             : 
     379             :     /// MachineRegisterInfo used to perform tracking.
     380             :     const MachineRegisterInfo &MRI;
     381             : 
     382             :     /// Optional TargetInstrInfo used to perform some complex
     383             :     /// tracking.
     384             :     const TargetInstrInfo *TII;
     385             : 
     386             :     /// \brief Dispatcher to the right underlying implementation of
     387             :     /// getNextSource.
     388             :     ValueTrackerResult getNextSourceImpl();
     389             : 
     390             :     /// \brief Specialized version of getNextSource for Copy instructions.
     391             :     ValueTrackerResult getNextSourceFromCopy();
     392             : 
     393             :     /// \brief Specialized version of getNextSource for Bitcast instructions.
     394             :     ValueTrackerResult getNextSourceFromBitcast();
     395             : 
     396             :     /// \brief Specialized version of getNextSource for RegSequence
     397             :     /// instructions.
     398             :     ValueTrackerResult getNextSourceFromRegSequence();
     399             : 
     400             :     /// \brief Specialized version of getNextSource for InsertSubreg
     401             :     /// instructions.
     402             :     ValueTrackerResult getNextSourceFromInsertSubreg();
     403             : 
     404             :     /// \brief Specialized version of getNextSource for ExtractSubreg
     405             :     /// instructions.
     406             :     ValueTrackerResult getNextSourceFromExtractSubreg();
     407             : 
     408             :     /// \brief Specialized version of getNextSource for SubregToReg
     409             :     /// instructions.
     410             :     ValueTrackerResult getNextSourceFromSubregToReg();
     411             : 
     412             :     /// \brief Specialized version of getNextSource for PHI instructions.
     413             :     ValueTrackerResult getNextSourceFromPHI();
     414             : 
     415             :   public:
     416             :     /// \brief Create a ValueTracker instance for the value defined by \p Reg.
     417             :     /// \p DefSubReg represents the sub register index the value tracker will
     418             :     /// track. It does not need to match the sub register index used in the
     419             :     /// definition of \p Reg.
     420             :     /// \p UseAdvancedTracking specifies whether or not the value tracker looks
     421             :     /// through complex instructions. By default (false), it handles only copy
     422             :     /// and bitcast instructions.
     423             :     /// If \p Reg is a physical register, a value tracker constructed with
     424             :     /// this constructor will not find any alternative source.
     425             :     /// Indeed, when \p Reg is a physical register that constructor does not
     426             :     /// know which definition of \p Reg it should track.
     427             :     /// Use the next constructor to track a physical register.
     428      707874 :     ValueTracker(unsigned Reg, unsigned DefSubReg,
     429             :                  const MachineRegisterInfo &MRI,
     430             :                  bool UseAdvancedTracking = false,
     431             :                  const TargetInstrInfo *TII = nullptr)
     432      707874 :         : DefSubReg(DefSubReg), Reg(Reg),
     433      707874 :           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
     434      707874 :       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
     435      707874 :         Def = MRI.getVRegDef(Reg);
     436     1415748 :         DefIdx = MRI.def_begin(Reg).getOperandNo();
     437             :       }
     438      707874 :     }
     439             : 
     440             :     /// \brief Create a ValueTracker instance for the value defined by
     441             :     /// the pair \p MI, \p DefIdx.
     442             :     /// Unlike the other constructor, the value tracker produced by this one
     443             :     /// may be able to find a new source when the definition is a physical
     444             :     /// register.
     445             :     /// This could be useful to rewrite target specific instructions into
     446             :     /// generic copy instructions.
     447             :     ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
     448             :                  const MachineRegisterInfo &MRI,
     449             :                  bool UseAdvancedTracking = false,
     450             :                  const TargetInstrInfo *TII = nullptr)
     451             :         : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
     452             :           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
     453             :       assert(DefIdx < Def->getDesc().getNumDefs() &&
     454             :              Def->getOperand(DefIdx).isReg() && "Invalid definition");
     455             :       Reg = Def->getOperand(DefIdx).getReg();
     456             :     }
     457             : 
     458             :     /// \brief Following the use-def chain, get the next available source
     459             :     /// for the tracked value.
     460             :     /// \return A ValueTrackerResult containing a set of registers
     461             :     /// and sub registers with tracked values. A ValueTrackerResult with
     462             :     /// an empty set of registers means no source was found.
     463             :     ValueTrackerResult getNextSource();
     464             : 
     465             :     /// \brief Get the last register where the initial value can be found.
     466             :     /// Initially this is the register of the definition.
     467             :     /// Then, after each successful call to getNextSource, this is the
     468             :     /// register of the last source.
     469             :     unsigned getReg() const { return Reg; }
     470             :   };
     471             : 
     472             : } // end anonymous namespace
     473             : 
     474             : char PeepholeOptimizer::ID = 0;
     475             : 
     476             : char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
     477             : 
     478       20212 : INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
     479             :                 "Peephole Optimizations", false, false)
     480       20212 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     481       20212 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     482      198080 : INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
     483             :                 "Peephole Optimizations", false, false)
     484             : 
     485             : /// If instruction is a copy-like instruction, i.e. it reads a single register
     486             : /// and writes a single register and it does not modify the source, and if the
     487             : /// source value is preserved as a sub-register of the result, then replace all
     488             : /// reachable uses of the source with the subreg of the result.
     489             : ///
     490             : /// Do not generate an EXTRACT that is used only in a debug use, as this changes
     491             : /// the code. Since this code does not currently share EXTRACTs, just ignore all
     492             : /// debug uses.
     493     3667600 : bool PeepholeOptimizer::
     494             : optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
     495             :                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
     496             :   unsigned SrcReg, DstReg, SubIdx;
     497     3667600 :   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
     498             :     return false;
     499             : 
     500       34600 :   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
     501       23060 :       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     502             :     return false;
     503             : 
     504       11530 :   if (MRI->hasOneNonDBGUse(SrcReg))
     505             :     // No other uses.
     506             :     return false;
     507             : 
     508             :   // Ensure DstReg can get a register class that actually supports
     509             :   // sub-registers. Don't change the class until we commit.
     510        1204 :   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
     511         602 :   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
     512         602 :   if (!DstRC)
     513             :     return false;
     514             : 
     515             :   // The ext instr may be operating on a sub-register of SrcReg as well.
     516             :   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
     517             :   // register.
     518             :   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
     519             :   // SrcReg:SubIdx should be replaced.
     520             :   bool UseSrcSubIdx =
     521        1204 :       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
     522             : 
     523             :   // The source has other uses. See if we can replace the other uses with use of
     524             :   // the result of the extension.
     525         602 :   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
     526        3906 :   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
     527         700 :     ReachedBBs.insert(UI.getParent());
     528             : 
     529             :   // Uses that are in the same BB of uses of the result of the instruction.
     530        1204 :   SmallVector<MachineOperand*, 8> Uses;
     531             : 
     532             :   // Uses that the result of the instruction can reach.
     533        1204 :   SmallVector<MachineOperand*, 8> ExtendedUses;
     534             : 
     535         602 :   bool ExtendLife = true;
     536        3063 :   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
     537        1391 :     MachineInstr *UseMI = UseMO.getParent();
     538        1391 :     if (UseMI == MI)
     539         519 :       continue;
     540             : 
     541         872 :     if (UseMI->isPHI()) {
     542          25 :       ExtendLife = false;
     543          25 :       continue;
     544             :     }
     545             : 
     546             :     // Only accept uses of SrcReg:SubIdx.
     547         857 :     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
     548           3 :       continue;
     549             : 
     550             :     // It's an error to translate this:
     551             :     //
     552             :     //    %reg1025 = <sext> %reg1024
     553             :     //     ...
     554             :     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
     555             :     //
     556             :     // into this:
     557             :     //
     558             :     //    %reg1025 = <sext> %reg1024
     559             :     //     ...
     560             :     //    %reg1027 = COPY %reg1025:4
     561             :     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
     562             :     //
     563             :     // The problem here is that SUBREG_TO_REG is there to assert that an
     564             :     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
     565             :     // the COPY here, it will give us the value after the <sext>, not the
     566             :     // original value of %reg1024 before <sext>.
     567         847 :     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
     568           3 :       continue;
     569             : 
     570         841 :     MachineBasicBlock *UseMBB = UseMI->getParent();
     571         841 :     if (UseMBB == MBB) {
     572             :       // Local uses that come after the extension.
     573         698 :       if (!LocalMIs.count(UseMI))
     574         580 :         Uses.push_back(&UseMO);
     575         143 :     } else if (ReachedBBs.count(UseMBB)) {
     576             :       // Non-local uses where the result of the extension is used. Always
     577             :       // replace these unless it's a PHI.
     578           9 :       Uses.push_back(&UseMO);
     579         134 :     } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
     580             :       // We may want to extend the live range of the extension result in order
     581             :       // to replace these uses.
     582           0 :       ExtendedUses.push_back(&UseMO);
     583             :     } else {
     584             :       // Both will be live out of the def MBB anyway. Don't extend live range of
     585             :       // the extension result.
     586             :       ExtendLife = false;
     587             :       break;
     588             :     }
     589             :   }
     590             : 
     591         602 :   if (ExtendLife && !ExtendedUses.empty())
     592             :     // Extend the liveness of the extension result.
     593           0 :     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
     594             : 
     595             :   // Now replace all uses.
     596         602 :   bool Changed = false;
     597         602 :   if (!Uses.empty()) {
     598         720 :     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
     599             : 
     600             :     // Look for PHI uses of the extended result, we don't want to extend the
     601             :     // liveness of a PHI input. It breaks all kinds of assumptions down
     602             :     // stream. A PHI use is expected to be the kill of its source values.
     603        2253 :     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
     604             :       if (UI.isPHI())
     605           2 :         PHIBBs.insert(UI.getParent());
     606             : 
     607         720 :     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
     608        1309 :     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
     609        1178 :       MachineOperand *UseMO = Uses[i];
     610         589 :       MachineInstr *UseMI = UseMO->getParent();
     611         589 :       MachineBasicBlock *UseMBB = UseMI->getParent();
     612         589 :       if (PHIBBs.count(UseMBB))
     613           0 :         continue;
     614             : 
     615             :       // About to add uses of DstReg, clear DstReg's kill flags.
     616         589 :       if (!Changed) {
     617         360 :         MRI->clearKillFlags(DstReg);
     618         360 :         MRI->constrainRegClass(DstReg, DstRC);
     619             :       }
     620             : 
     621         589 :       unsigned NewVR = MRI->createVirtualRegister(RC);
     622        1178 :       MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
     623        1767 :                                    TII->get(TargetOpcode::COPY), NewVR)
     624         589 :         .addReg(DstReg, 0, SubIdx);
     625             :       // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
     626         589 :       if (UseSrcSubIdx) {
     627           2 :         Copy->getOperand(0).setSubReg(SubIdx);
     628           1 :         Copy->getOperand(0).setIsUndef();
     629             :       }
     630         589 :       UseMO->setReg(NewVR);
     631         589 :       ++NumReuse;
     632         589 :       Changed = true;
     633             :     }
     634             :   }
     635             : 
     636         602 :   return Changed;
     637             : }
     638             : 
     639             : /// If the instruction is a compare and the previous instruction it's comparing
     640             : /// against already sets (or could be modified to set) the same flag as the
     641             : /// compare, then we can remove the comparison and use the flag from the
     642             : /// previous instruction.
     643       69184 : bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
     644             :                                          MachineBasicBlock *MBB) {
     645             :   // If this instruction is a comparison against zero and isn't comparing a
     646             :   // physical register, we can try to optimize it.
     647             :   unsigned SrcReg, SrcReg2;
     648             :   int CmpMask, CmpValue;
     649      130629 :   if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
     650      192066 :       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
     651       89569 :       (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
     652             :     return false;
     653             : 
     654             :   // Attempt to optimize the comparison instruction.
     655       61437 :   if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
     656             :     ++NumCmps;
     657             :     return true;
     658             :   }
     659             : 
     660             :   return false;
     661             : }
     662             : 
     663             : /// Optimize a select instruction.
     664         737 : bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
     665             :                             SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
     666         737 :   unsigned TrueOp = 0;
     667         737 :   unsigned FalseOp = 0;
     668         737 :   bool Optimizable = false;
     669        1474 :   SmallVector<MachineOperand, 4> Cond;
     670         737 :   if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable))
     671             :     return false;
     672         403 :   if (!Optimizable)
     673             :     return false;
     674         403 :   if (!TII->optimizeSelect(*MI, LocalMIs))
     675             :     return false;
     676         131 :   MI->eraseFromParent();
     677         131 :   ++NumSelects;
     678             :   return true;
     679             : }
     680             : 
     681             : /// \brief Check if a simpler conditional branch can be
     682             : /// generated
     683             : bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
     684       42404 :   return TII->optimizeCondBranch(*MI);
     685             : }
     686             : 
     687             : /// \brief Try to find the next source that share the same register file
     688             : /// for the value defined by \p Reg and \p SubReg.
     689             : /// When true is returned, the \p RewriteMap can be used by the client to
     690             : /// retrieve all Def -> Use along the way up to the next source. Any found
     691             : /// Use that is not itself a key for another entry, is the next source to
     692             : /// use. During the search for the next source, multiple sources can be found
     693             : /// given multiple incoming sources of a PHI instruction. In this case, we
     694             : /// look in each PHI source for the next source; all found next sources must
     695             : /// share the same register file as \p Reg and \p SubReg. The client should
     696             : /// then be capable to rewrite all intermediate PHIs to get the next source.
     697             : /// \return False if no alternative sources are available. True otherwise.
     698      707736 : bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
     699             :                                        RewriteMapTy &RewriteMap) {
     700             :   // Do not try to find a new source for a physical register.
     701             :   // So far we do not have any motivating example for doing that.
     702             :   // Thus, instead of maintaining untested code, we will revisit that if
     703             :   // that changes at some point.
     704      707736 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     705             :     return false;
     706     1415472 :   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
     707             : 
     708      707736 :   SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
     709      707736 :   TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
     710      707736 :   SrcToLook.push_back(CurSrcPair);
     711             : 
     712      707736 :   unsigned PHICount = 0;
     713     1565714 :   while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
     714      707874 :     TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
     715             :     // As explained above, do not handle physical registers
     716     1415748 :     if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
     717      557770 :       return false;
     718             : 
     719      707874 :     CurSrcPair = Pair;
     720      707874 :     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
     721     2123622 :                             !DisableAdvCopyOpt, TII);
     722      707874 :     ValueTrackerResult Res;
     723      707874 :     bool ShouldRewrite = false;
     724             : 
     725             :     do {
     726             :       // Follow the chain of copies until we reach the top of the use-def chain
     727             :       // or find a more suitable source.
     728     3120870 :       Res = ValTracker.getNextSource();
     729     1040290 :       if (!Res.isValid())
     730             :         break;
     731             : 
     732             :       // Insert the Def -> Use entry for the recently found source.
     733     1281577 :       ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
     734      799183 :       if (CurSrcRes.isValid()) {
     735             :         assert(CurSrcRes == Res && "ValueTrackerResult found must match");
     736             :         // An existent entry with multiple sources is a PHI cycle we must avoid.
     737             :         // Otherwise it's an entry with a valid next source we already found.
     738           5 :         if (CurSrcRes.getNumSources() > 1) {
     739             :           DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
     740      316663 :           return false;
     741             :         }
     742             :         break;
     743             :       }
     744     2397534 :       RewriteMap.insert(std::make_pair(CurSrcPair, Res));
     745             : 
     746             :       // ValueTrackerResult usually have one source unless it's the result from
     747             :       // a PHI instruction. Add the found PHI edges to be looked up further.
     748      799178 :       unsigned NumSrcs = Res.getNumSources();
     749      799178 :       if (NumSrcs > 1) {
     750         121 :         PHICount++;
     751         371 :         for (unsigned i = 0; i < NumSrcs; ++i)
     752        1000 :           SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
     753             :               Res.getSrcReg(i), Res.getSrcSubReg(i)));
     754             :         break;
     755             :       }
     756             : 
     757      799057 :       CurSrcPair.Reg = Res.getSrcReg(0);
     758      799057 :       CurSrcPair.SubReg = Res.getSrcSubReg(0);
     759             :       // Do not extend the live-ranges of physical registers as they add
     760             :       // constraints to the register allocator. Moreover, if we want to extend
     761             :       // the live-range of a physical register, unlike SSA virtual register,
     762             :       // we will have to check that they aren't redefine before the related use.
     763     1598114 :       if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
     764             :         return false;
     765             : 
     766      964788 :       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
     767      964788 :       ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
     768      482394 :                                                 CurSrcPair.SubReg);
     769      482394 :     } while (!ShouldRewrite);
     770             : 
     771             :     // Continue looking for new sources...
     772      391211 :     if (Res.isValid())
     773      150104 :       continue;
     774             : 
     775             :     // Do not continue searching for a new source if the there's at least
     776             :     // one use-def which cannot be rewritten.
     777      241107 :     if (!ShouldRewrite)
     778             :       return false;
     779             :   }
     780             : 
     781      149966 :   if (PHICount >= RewritePHILimit) {
     782             :     DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
     783             :     return false;
     784             :   }
     785             : 
     786             :   // If we did not find a more suitable source, there is nothing to optimize.
     787      149966 :   return CurSrcPair.Reg != Reg;
     788             : }
     789             : 
     790             : /// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
     791             : /// guaranteed to have the same register class. This is necessary whenever we
     792             : /// successfully traverse a PHI instruction and find suitable sources coming
     793             : /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
     794             : /// suitable to be used in a new COPY instruction.
     795             : static MachineInstr *
     796           5 : insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
     797             :           const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
     798             :           MachineInstr *OrigPHI) {
     799             :   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
     800             : 
     801          15 :   const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
     802           5 :   unsigned NewVR = MRI->createVirtualRegister(NewRC);
     803           5 :   MachineBasicBlock *MBB = OrigPHI->getParent();
     804             :   MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
     805          15 :                                     TII->get(TargetOpcode::PHI), NewVR);
     806             : 
     807           5 :   unsigned MBBOpIdx = 2;
     808          25 :   for (auto RegPair : SrcRegs) {
     809          10 :     MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
     810          30 :     MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
     811             :     // Since we're extended the lifetime of RegPair.Reg, clear the
     812             :     // kill flags to account for that and make RegPair.Reg reaches
     813             :     // the new PHI.
     814          10 :     MRI->clearKillFlags(RegPair.Reg);
     815          10 :     MBBOpIdx += 2;
     816             :   }
     817             : 
     818           5 :   return MIB;
     819             : }
     820             : 
     821             : namespace {
     822             : 
     823             : /// \brief Helper class to rewrite the arguments of a copy-like instruction.
     824             : class CopyRewriter {
     825             : protected:
     826             :   /// The copy-like instruction.
     827             :   MachineInstr &CopyLike;
     828             : 
     829             :   /// The index of the source being rewritten.
     830             :   unsigned CurrentSrcIdx = 0;
     831             : 
     832             : public:
     833      615815 :   CopyRewriter(MachineInstr &MI) : CopyLike(MI) {}
     834      541936 :   virtual ~CopyRewriter() = default;
     835             : 
     836             :   /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
     837             :   /// the related value that it affects (TrackReg, TrackSubReg).
     838             :   /// A source is considered rewritable if its register class and the
     839             :   /// register class of the related TrackReg may not be register
     840             :   /// coalescer friendly. In other words, given a copy-like instruction
     841             :   /// not all the arguments may be returned at rewritable source, since
     842             :   /// some arguments are none to be register coalescer friendly.
     843             :   ///
     844             :   /// Each call of this method moves the current source to the next
     845             :   /// rewritable source.
     846             :   /// For instance, let CopyLike be the instruction to rewrite.
     847             :   /// CopyLike has one definition and one source:
     848             :   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
     849             :   ///
     850             :   /// The first call will give the first rewritable source, i.e.,
     851             :   /// the only source this instruction has:
     852             :   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
     853             :   /// This source defines the whole definition, i.e.,
     854             :   /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
     855             :   ///
     856             :   /// The second and subsequent calls will return false, as there is only one
     857             :   /// rewritable source.
     858             :   ///
     859             :   /// \return True if a rewritable source has been found, false otherwise.
     860             :   /// The output arguments are valid if and only if true is returned.
     861     1083872 :   virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
     862             :                                        unsigned &TrackReg,
     863             :                                        unsigned &TrackSubReg) {
     864             :     // If CurrentSrcIdx == 1, this means this function has already been called
     865             :     // once. CopyLike has one definition and one argument, thus, there is
     866             :     // nothing else to rewrite.
     867     2167744 :     if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
     868             :       return false;
     869             :     // This is the first call to getNextRewritableSource.
     870             :     // Move the CurrentSrcIdx to remember that we made that call.
     871      541936 :     CurrentSrcIdx = 1;
     872             :     // The rewritable source is the argument.
     873      541936 :     const MachineOperand &MOSrc = CopyLike.getOperand(1);
     874      541936 :     SrcReg = MOSrc.getReg();
     875      541936 :     SrcSubReg = MOSrc.getSubReg();
     876             :     // What we track are the alternative sources of the definition.
     877      541936 :     const MachineOperand &MODef = CopyLike.getOperand(0);
     878      541936 :     TrackReg = MODef.getReg();
     879      541936 :     TrackSubReg = MODef.getSubReg();
     880      541936 :     return true;
     881             :   }
     882             : 
     883             :   /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
     884             :   /// if possible.
     885             :   /// \return True if the rewriting was possible, false otherwise.
     886       16964 :   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
     887       33928 :     if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
     888             :       return false;
     889       33928 :     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
     890       16964 :     MOSrc.setReg(NewReg);
     891       16964 :     MOSrc.setSubReg(NewSubReg);
     892       16964 :     return true;
     893             :   }
     894             : 
     895             :   /// \brief Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
     896             :   /// the new source to use for rewrite. If \p HandleMultipleSources is true and
     897             :   /// multiple sources for a given \p Def are found along the way, we found a
     898             :   /// PHI instructions that needs to be rewritten.
     899             :   /// TODO: HandleMultipleSources should be removed once we test PHI handling
     900             :   /// with coalescable copies.
     901             :   TargetInstrInfo::RegSubRegPair
     902      149972 :   getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
     903             :                TargetInstrInfo::RegSubRegPair Def,
     904             :                PeepholeOptimizer::RewriteMapTy &RewriteMap,
     905             :                bool HandleMultipleSources = true) {
     906      149972 :     TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
     907             :     do {
     908      324666 :       ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
     909             :       // If there are no entries on the map, LookupSrc is the new source.
     910      324666 :       if (!Res.isValid())
     911      299939 :         return LookupSrc;
     912             : 
     913             :       // There's only one source for this definition, keep searching...
     914      174699 :       unsigned NumSrcs = Res.getNumSources();
     915      349393 :       if (NumSrcs == 1) {
     916      174694 :         LookupSrc.Reg = Res.getSrcReg(0);
     917      174694 :         LookupSrc.SubReg = Res.getSrcSubReg(0);
     918      174694 :         continue;
     919             :       }
     920             : 
     921             :       // TODO: Remove once multiple srcs w/ coalescable copies are supported.
     922           5 :       if (!HandleMultipleSources)
     923             :         break;
     924             : 
     925             :       // Multiple sources, recurse into each source to find a new source
     926             :       // for it. Then, rewrite the PHI accordingly to its new edges.
     927           5 :       SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
     928          15 :       for (unsigned i = 0; i < NumSrcs; ++i) {
     929             :         TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
     930          40 :                                               Res.getSrcSubReg(i));
     931          10 :         NewPHISrcs.push_back(
     932          20 :             getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
     933             :       }
     934             : 
     935             :       // Build the new PHI node and return its def register as the new source.
     936           5 :       MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
     937           5 :       MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
     938             :       DEBUG(dbgs() << "-- getNewSource\n");
     939             :       DEBUG(dbgs() << "   Replacing: " << *OrigPHI);
     940             :       DEBUG(dbgs() << "        With: " << *NewPHI);
     941           5 :       const MachineOperand &MODef = NewPHI->getOperand(0);
     942           5 :       return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
     943             : 
     944             :     } while (true);
     945             : 
     946           0 :     return TargetInstrInfo::RegSubRegPair(0, 0);
     947             :   }
     948             : 
     949             :   /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
     950             :   /// and create a new COPY instruction. More info about RewriteMap in
     951             :   /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
     952             :   /// Uncoalescable copies, since they are copy like instructions that aren't
     953             :   /// recognized by the register allocator.
     954             :   virtual MachineInstr *
     955           0 :   RewriteSource(TargetInstrInfo::RegSubRegPair Def,
     956             :                 PeepholeOptimizer::RewriteMapTy &RewriteMap) {
     957           0 :     return nullptr;
     958             :   }
     959             : };
     960             : 
     961             : /// \brief Helper class to rewrite uncoalescable copy like instructions
     962             : /// into new COPY (coalescable friendly) instructions.
     963        6958 : class UncoalescableRewriter : public CopyRewriter {
     964             : protected:
     965             :   const TargetInstrInfo &TII;
     966             :   MachineRegisterInfo   &MRI;
     967             : 
     968             :   /// The number of defs in the bitcast
     969             :   unsigned NumDefs;
     970             : 
     971             : public:
     972             :   UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
     973             :                          MachineRegisterInfo &MRI)
     974       13916 :       : CopyRewriter(MI), TII(TII), MRI(MRI) {
     975       13916 :     NumDefs = MI.getDesc().getNumDefs();
     976             :   }
     977             : 
     978             :   /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
     979             :   /// All such sources need to be considered rewritable in order to
     980             :   /// rewrite a uncoalescable copy-like instruction. This method return
     981             :   /// each definition that must be checked if rewritable.
     982        7066 :   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
     983             :                                unsigned &TrackReg,
     984             :                                unsigned &TrackSubReg) override {
     985             :     // Find the next non-dead definition and continue from there.
     986        7066 :     if (CurrentSrcIdx == NumDefs)
     987             :       return false;
     988             : 
     989       21015 :     while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
     990           0 :       ++CurrentSrcIdx;
     991           0 :       if (CurrentSrcIdx == NumDefs)
     992             :         return false;
     993             :     }
     994             : 
     995             :     // What we track are the alternative sources of the definition.
     996        7005 :     const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
     997        7005 :     TrackReg = MODef.getReg();
     998        7005 :     TrackSubReg = MODef.getSubReg();
     999             : 
    1000        7005 :     CurrentSrcIdx++;
    1001        7005 :     return true;
    1002             :   }
    1003             : 
    1004             :   /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
    1005             :   /// and create a new COPY instruction. More info about RewriteMap in
    1006             :   /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
    1007             :   /// Uncoalescable copies, since they are copy like instructions that aren't
    1008             :   /// recognized by the register allocator.
    1009             :   MachineInstr *
    1010         104 :   RewriteSource(TargetInstrInfo::RegSubRegPair Def,
    1011             :                 PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
    1012             :     assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
    1013             :            "We do not rewrite physical registers");
    1014             : 
    1015             :     // Find the new source to use in the COPY rewrite.
    1016             :     TargetInstrInfo::RegSubRegPair NewSrc =
    1017         104 :         getNewSource(&MRI, &TII, Def, RewriteMap);
    1018             : 
    1019             :     // Insert the COPY.
    1020         208 :     const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
    1021         104 :     unsigned NewVR = MRI.createVirtualRegister(DefRC);
    1022             : 
    1023             :     MachineInstr *NewCopy =
    1024         416 :         BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
    1025         312 :                 TII.get(TargetOpcode::COPY), NewVR)
    1026         104 :             .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
    1027             : 
    1028         208 :     NewCopy->getOperand(0).setSubReg(Def.SubReg);
    1029         104 :     if (Def.SubReg)
    1030           0 :       NewCopy->getOperand(0).setIsUndef();
    1031             : 
    1032             :     DEBUG(dbgs() << "-- RewriteSource\n");
    1033             :     DEBUG(dbgs() << "   Replacing: " << CopyLike);
    1034             :     DEBUG(dbgs() << "        With: " << *NewCopy);
    1035         104 :     MRI.replaceRegWith(Def.Reg, NewVR);
    1036         104 :     MRI.clearKillFlags(NewVR);
    1037             : 
    1038             :     // We extended the lifetime of NewSrc.Reg, clear the kill flags to
    1039             :     // account for that.
    1040         104 :     MRI.clearKillFlags(NewSrc.Reg);
    1041             : 
    1042         104 :     return NewCopy;
    1043             :   }
    1044             : };
    1045             : 
    1046             : /// \brief Specialized rewriter for INSERT_SUBREG instruction.
    1047       16699 : class InsertSubregRewriter : public CopyRewriter {
    1048             : public:
    1049       33398 :   InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
    1050             :     assert(MI.isInsertSubreg() && "Invalid instruction");
    1051             :   }
    1052             : 
    1053             :   /// \brief See CopyRewriter::getNextRewritableSource.
    1054             :   /// Here CopyLike has the following form:
    1055             :   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
    1056             :   /// Src1 has the same register class has dst, hence, there is
    1057             :   /// nothing to rewrite.
    1058             :   /// Src2.src2SubIdx, may not be register coalescer friendly.
    1059             :   /// Therefore, the first call to this method returns:
    1060             :   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    1061             :   /// (TrackReg, TrackSubReg) = (dst, subIdx).
    1062             :   ///
    1063             :   /// Subsequence calls will return false.
    1064       33398 :   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    1065             :                                unsigned &TrackReg,
    1066             :                                unsigned &TrackSubReg) override {
    1067             :     // If we already get the only source we can rewrite, return false.
    1068       33398 :     if (CurrentSrcIdx == 2)
    1069             :       return false;
    1070             :     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
    1071       16699 :     CurrentSrcIdx = 2;
    1072       16699 :     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
    1073       16699 :     SrcReg = MOInsertedReg.getReg();
    1074       16699 :     SrcSubReg = MOInsertedReg.getSubReg();
    1075       16699 :     const MachineOperand &MODef = CopyLike.getOperand(0);
    1076             : 
    1077             :     // We want to track something that is compatible with the
    1078             :     // partial definition.
    1079       16699 :     TrackReg = MODef.getReg();
    1080       16699 :     if (MODef.getSubReg())
    1081             :       // Bail if we have to compose sub-register indices.
    1082             :       return false;
    1083       16699 :     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
    1084       16699 :     return true;
    1085             :   }
    1086             : 
    1087           5 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    1088           5 :     if (CurrentSrcIdx != 2)
    1089             :       return false;
    1090             :     // We are rewriting the inserted reg.
    1091          10 :     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    1092           5 :     MO.setReg(NewReg);
    1093           5 :     MO.setSubReg(NewSubReg);
    1094           5 :     return true;
    1095             :   }
    1096             : };
    1097             : 
    1098             : /// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
    1099           0 : class ExtractSubregRewriter : public CopyRewriter {
    1100             :   const TargetInstrInfo &TII;
    1101             : 
    1102             : public:
    1103             :   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
    1104           0 :       : CopyRewriter(MI), TII(TII) {
    1105             :     assert(MI.isExtractSubreg() && "Invalid instruction");
    1106             :   }
    1107             : 
    1108             :   /// \brief See CopyRewriter::getNextRewritableSource.
    1109             :   /// Here CopyLike has the following form:
    1110             :   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
    1111             :   /// There is only one rewritable source: Src.subIdx,
    1112             :   /// which defines dst.dstSubIdx.
    1113           0 :   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    1114             :                                unsigned &TrackReg,
    1115             :                                unsigned &TrackSubReg) override {
    1116             :     // If we already get the only source we can rewrite, return false.
    1117           0 :     if (CurrentSrcIdx == 1)
    1118             :       return false;
    1119             :     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
    1120           0 :     CurrentSrcIdx = 1;
    1121           0 :     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
    1122           0 :     SrcReg = MOExtractedReg.getReg();
    1123             :     // If we have to compose sub-register indices, bail out.
    1124           0 :     if (MOExtractedReg.getSubReg())
    1125             :       return false;
    1126             : 
    1127           0 :     SrcSubReg = CopyLike.getOperand(2).getImm();
    1128             : 
    1129             :     // We want to track something that is compatible with the definition.
    1130           0 :     const MachineOperand &MODef = CopyLike.getOperand(0);
    1131           0 :     TrackReg = MODef.getReg();
    1132           0 :     TrackSubReg = MODef.getSubReg();
    1133           0 :     return true;
    1134             :   }
    1135             : 
    1136           0 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    1137             :     // The only source we can rewrite is the input register.
    1138           0 :     if (CurrentSrcIdx != 1)
    1139             :       return false;
    1140             : 
    1141           0 :     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
    1142             : 
    1143             :     // If we find a source that does not require to extract something,
    1144             :     // rewrite the operation with a copy.
    1145           0 :     if (!NewSubReg) {
    1146             :       // Move the current index to an invalid position.
    1147             :       // We do not want another call to this method to be able
    1148             :       // to do any change.
    1149           0 :       CurrentSrcIdx = -1;
    1150             :       // Rewrite the operation as a COPY.
    1151             :       // Get rid of the sub-register index.
    1152           0 :       CopyLike.RemoveOperand(2);
    1153             :       // Morph the operation into a COPY.
    1154           0 :       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
    1155           0 :       return true;
    1156             :     }
    1157           0 :     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
    1158           0 :     return true;
    1159             :   }
    1160             : };
    1161             : 
    1162             : /// \brief Specialized rewriter for REG_SEQUENCE instruction.
    1163       50222 : class RegSequenceRewriter : public CopyRewriter {
    1164             : public:
    1165      100444 :   RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
    1166             :     assert(MI.isRegSequence() && "Invalid instruction");
    1167             :   }
    1168             : 
    1169             :   /// \brief See CopyRewriter::getNextRewritableSource.
    1170             :   /// Here CopyLike has the following form:
    1171             :   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
    1172             :   /// Each call will return a different source, walking all the available
    1173             :   /// source.
    1174             :   ///
    1175             :   /// The first call returns:
    1176             :   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
    1177             :   /// (TrackReg, TrackSubReg) = (dst, subIdx1).
    1178             :   ///
    1179             :   /// The second call returns:
    1180             :   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    1181             :   /// (TrackReg, TrackSubReg) = (dst, subIdx2).
    1182             :   ///
    1183             :   /// And so on, until all the sources have been traversed, then
    1184             :   /// it returns false.
    1185      192318 :   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    1186             :                                unsigned &TrackReg,
    1187             :                                unsigned &TrackSubReg) override {
    1188             :     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
    1189             : 
    1190             :     // If this is the first call, move to the first argument.
    1191      192318 :     if (CurrentSrcIdx == 0) {
    1192       50222 :       CurrentSrcIdx = 1;
    1193             :     } else {
    1194             :       // Otherwise, move to the next argument and check that it is valid.
    1195      142096 :       CurrentSrcIdx += 2;
    1196      142096 :       if (CurrentSrcIdx >= CopyLike.getNumOperands())
    1197             :         return false;
    1198             :     }
    1199      284304 :     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
    1200      142152 :     SrcReg = MOInsertedReg.getReg();
    1201             :     // If we have to compose sub-register indices, bail out.
    1202      142152 :     if ((SrcSubReg = MOInsertedReg.getSubReg()))
    1203             :       return false;
    1204             : 
    1205             :     // We want to track something that is compatible with the related
    1206             :     // partial definition.
    1207      284192 :     TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
    1208             : 
    1209      142096 :     const MachineOperand &MODef = CopyLike.getOperand(0);
    1210      142096 :     TrackReg = MODef.getReg();
    1211             :     // If we have to compose sub-registers, bail.
    1212      142096 :     return MODef.getSubReg() == 0;
    1213             :   }
    1214             : 
    1215        7426 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    1216             :     // We cannot rewrite out of bound operands.
    1217             :     // Moreover, rewritable sources are at odd positions.
    1218        7426 :     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
    1219             :       return false;
    1220             : 
    1221       14852 :     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    1222        7426 :     MO.setReg(NewReg);
    1223        7426 :     MO.setSubReg(NewSubReg);
    1224        7426 :     return true;
    1225             :   }
    1226             : };
    1227             : 
    1228             : } // end anonymous namespace
    1229             : 
    1230             : /// \brief Get the appropriated CopyRewriter for \p MI.
    1231             : /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
    1232             : /// if no rewriter works for \p MI.
    1233      615815 : static CopyRewriter *getCopyRewriter(MachineInstr &MI,
    1234             :                                      const TargetInstrInfo &TII,
    1235             :                                      MachineRegisterInfo &MRI) {
    1236             :   // Handle uncoalescable copy-like instructions.
    1237     2452424 :   if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
    1238      611689 :                          MI.isExtractSubregLike()))
    1239       13916 :     return new UncoalescableRewriter(MI, TII, MRI);
    1240             : 
    1241     1217714 :   switch (MI.getOpcode()) {
    1242             :   default:
    1243             :     return nullptr;
    1244      541936 :   case TargetOpcode::COPY:
    1245     1083872 :     return new CopyRewriter(MI);
    1246       16699 :   case TargetOpcode::INSERT_SUBREG:
    1247       33398 :     return new InsertSubregRewriter(MI);
    1248           0 :   case TargetOpcode::EXTRACT_SUBREG:
    1249           0 :     return new ExtractSubregRewriter(MI, TII);
    1250       50222 :   case TargetOpcode::REG_SEQUENCE:
    1251      100444 :     return new RegSequenceRewriter(MI);
    1252             :   }
    1253             :   llvm_unreachable(nullptr);
    1254             : }
    1255             : 
    1256             : /// \brief Optimize generic copy instructions to avoid cross
    1257             : /// register bank copy. The optimization looks through a chain of
    1258             : /// copies and tries to find a source that has a compatible register
    1259             : /// class.
    1260             : /// Two register classes are considered to be compatible if they share
    1261             : /// the same register bank.
    1262             : /// New copies issued by this optimization are register allocator
    1263             : /// friendly. This optimization does not remove any copy as it may
    1264             : /// overconstrain the register allocator, but replaces some operands
    1265             : /// when possible.
    1266             : /// \pre isCoalescableCopy(*MI) is true.
    1267             : /// \return True, when \p MI has been rewritten. False otherwise.
    1268     1081526 : bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
    1269             :   assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
    1270             :   assert(MI->getDesc().getNumDefs() == 1 &&
    1271             :          "Coalescer can understand multiple defs?!");
    1272     1081526 :   const MachineOperand &MODef = MI->getOperand(0);
    1273             :   // Do not rewrite physical definitions.
    1274     2163052 :   if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
    1275             :     return false;
    1276             : 
    1277      608857 :   bool Changed = false;
    1278             :   // Get the right rewriter for the current copy.
    1279     1217714 :   std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
    1280             :   // If none exists, bail out.
    1281      608857 :   if (!CpyRewriter)
    1282             :     return false;
    1283             :   // Rewrite each rewritable source.
    1284             :   unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
    1285     2619176 :   while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
    1286     1309588 :                                               TrackSubReg)) {
    1287             :     // Keep track of PHI nodes and its incoming edges when looking for sources.
    1288      725126 :     RewriteMapTy RewriteMap;
    1289             :     // Try to find a more suitable source. If we failed to do so, or get the
    1290             :     // actual source, move to the next source.
    1291      700731 :     if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
    1292     1227209 :       continue;
    1293             : 
    1294             :     // Get the new source to rewrite. TODO: Only enable handling of multiple
    1295             :     // sources (PHIs) once we have a motivating example and testcases for it.
    1296      299716 :     TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
    1297             :     TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
    1298      149858 :         MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
    1299      149858 :     if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
    1300      125463 :       continue;
    1301             : 
    1302             :     // Rewrite source.
    1303       24395 :     if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
    1304             :       // We may have extended the live-range of NewSrc, account for that.
    1305       24395 :       MRI->clearKillFlags(NewSrc.Reg);
    1306       24395 :       Changed = true;
    1307             :     }
    1308             :   }
    1309             :   // TODO: We could have a clean-up method to tidy the instruction.
    1310             :   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
    1311             :   // => v0 = COPY v1
    1312             :   // Currently we haven't seen motivating example for that and we
    1313             :   // want to avoid untested code.
    1314      608857 :   NumRewrittenCopies += Changed;
    1315      608857 :   return Changed;
    1316             : }
    1317             : 
    1318             : /// \brief Optimize copy-like instructions to create
    1319             : /// register coalescer friendly instruction.
    1320             : /// The optimization tries to kill-off the \p MI by looking
    1321             : /// through a chain of copies to find a source that has a compatible
    1322             : /// register class.
    1323             : /// If such a source is found, it replace \p MI by a generic COPY
    1324             : /// operation.
    1325             : /// \pre isUncoalescableCopy(*MI) is true.
    1326             : /// \return True, when \p MI has been optimized. In that case, \p MI has
    1327             : /// been removed from its parent.
    1328             : /// All COPY instructions created, are inserted in \p LocalMIs.
    1329        6958 : bool PeepholeOptimizer::optimizeUncoalescableCopy(
    1330             :     MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    1331             :   assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
    1332             : 
    1333             :   // Check if we can rewrite all the values defined by this instruction.
    1334       13916 :   SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
    1335             :   // Get the right rewriter for the current copy.
    1336       20874 :   std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
    1337             :   // If none exists, bail out.
    1338        6958 :   if (!CpyRewriter)
    1339             :     return false;
    1340             : 
    1341             :   // Rewrite each rewritable source by generating new COPYs. This works
    1342             :   // differently from optimizeCoalescableCopy since it first makes sure that all
    1343             :   // definitions can be rewritten.
    1344        6958 :   RewriteMapTy RewriteMap;
    1345             :   unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
    1346       14240 :   while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
    1347        7066 :                                               CopyDefSubReg)) {
    1348             :     // If a physical register is here, this is probably for a good reason.
    1349             :     // Do not rewrite that.
    1350       14010 :     if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
    1351        6897 :       return false;
    1352             : 
    1353             :     // If we do not know how to rewrite this definition, there is no point
    1354             :     // in trying to kill this instruction.
    1355       14010 :     TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
    1356        7005 :     if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
    1357             :       return false;
    1358             : 
    1359         108 :     RewritePairs.push_back(Def);
    1360             :   }
    1361             : 
    1362             :   // The change is possible for all defs, do it.
    1363         287 :   for (const auto &Def : RewritePairs) {
    1364             :     // Rewrite the "copy" in a way the register coalescer understands.
    1365         104 :     MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
    1366             :     assert(NewCopy && "Should be able to always generate a new copy");
    1367         104 :     LocalMIs.insert(NewCopy);
    1368             :   }
    1369             : 
    1370             :   // MI is now dead.
    1371          61 :   MI->eraseFromParent();
    1372          61 :   ++NumUncoalescableCopies;
    1373          61 :   return true;
    1374             : }
    1375             : 
    1376             : /// Check whether MI is a candidate for folding into a later instruction.
    1377             : /// We only fold loads to virtual registers and the virtual register defined
    1378             : /// has a single use.
    1379     3720648 : bool PeepholeOptimizer::isLoadFoldable(
    1380             :     MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
    1381     3720648 :   if (!MI->canFoldAsLoad() || !MI->mayLoad())
    1382             :     return false;
    1383      159775 :   const MCInstrDesc &MCID = MI->getDesc();
    1384      159775 :   if (MCID.getNumDefs() != 1)
    1385             :     return false;
    1386             : 
    1387      159775 :   unsigned Reg = MI->getOperand(0).getReg();
    1388             :   // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
    1389             :   // loads. It should be checked when processing uses of the load, since
    1390             :   // uses can be removed during peephole.
    1391      479325 :   if (!MI->getOperand(0).getSubReg() &&
    1392      479280 :       TargetRegisterInfo::isVirtualRegister(Reg) &&
    1393      159730 :       MRI->hasOneNonDBGUse(Reg)) {
    1394      126342 :     FoldAsLoadDefCandidates.insert(Reg);
    1395             :     return true;
    1396             :   }
    1397             :   return false;
    1398             : }
    1399             : 
    1400     3720648 : bool PeepholeOptimizer::isMoveImmediate(
    1401             :     MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs,
    1402             :     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
    1403     3720648 :   const MCInstrDesc &MCID = MI->getDesc();
    1404     7441296 :   if (!MI->isMoveImmediate())
    1405             :     return false;
    1406       55171 :   if (MCID.getNumDefs() != 1)
    1407             :     return false;
    1408       54560 :   unsigned Reg = MI->getOperand(0).getReg();
    1409      109120 :   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    1410      159144 :     ImmDefMIs.insert(std::make_pair(Reg, MI));
    1411       53048 :     ImmDefRegs.insert(Reg);
    1412             :     return true;
    1413             :   }
    1414             : 
    1415             :   return false;
    1416             : }
    1417             : 
    1418             : /// Try folding register operands that are defined by move immediate
    1419             : /// instructions, i.e. a trivial constant folding optimization, if
    1420             : /// and only if the def and use are in the same BB.
    1421      305569 : bool PeepholeOptimizer::foldImmediate(
    1422             :     MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs,
    1423             :     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
    1424     1547396 :   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    1425     1874974 :     MachineOperand &MO = MI->getOperand(i);
    1426     1598775 :     if (!MO.isReg() || MO.isDef())
    1427      884198 :       continue;
    1428             :     // Ignore dead implicit defs.
    1429      412535 :     if (MO.isImplicit() && MO.isDead())
    1430             :       continue;
    1431      412535 :     unsigned Reg = MO.getReg();
    1432      853686 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    1433             :       continue;
    1434      383919 :     if (ImmDefRegs.count(Reg) == 0)
    1435             :       continue;
    1436       53289 :     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
    1437             :     assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
    1438       53289 :     if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) {
    1439        1229 :       ++NumImmFold;
    1440        1229 :       return true;
    1441             :     }
    1442             :   }
    1443             :   return false;
    1444             : }
    1445             : 
    1446             : // FIXME: This is very simple and misses some cases which should be handled when
    1447             : // motivating examples are found.
    1448             : //
    1449             : // The copy rewriting logic should look at uses as well as defs and be able to
    1450             : // eliminate copies across blocks.
    1451             : //
    1452             : // Later copies that are subregister extracts will also not be eliminated since
    1453             : // only the first copy is considered.
    1454             : //
    1455             : // e.g.
    1456             : // %vreg1 = COPY %vreg0
    1457             : // %vreg2 = COPY %vreg0:sub1
    1458             : //
    1459             : // Should replace %vreg2 uses with %vreg1:sub1
    1460      997641 : bool PeepholeOptimizer::foldRedundantCopy(
    1461             :     MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
    1462             :     DenseMap<unsigned, MachineInstr *> &CopyMIs) {
    1463             :   assert(MI->isCopy() && "expected a COPY machine instruction");
    1464             : 
    1465      997641 :   unsigned SrcReg = MI->getOperand(1).getReg();
    1466     1995282 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
    1467             :     return false;
    1468             : 
    1469      686678 :   unsigned DstReg = MI->getOperand(0).getReg();
    1470      686678 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
    1471             :     return false;
    1472             : 
    1473      214017 :   if (CopySrcRegs.insert(SrcReg).second) {
    1474             :     // First copy of this reg seen.
    1475      476283 :     CopyMIs.insert(std::make_pair(SrcReg, MI));
    1476             :     return false;
    1477             :   }
    1478             : 
    1479       55256 :   MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
    1480             : 
    1481      110512 :   unsigned SrcSubReg = MI->getOperand(1).getSubReg();
    1482      110512 :   unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
    1483             : 
    1484             :   // Can't replace different subregister extracts.
    1485       55256 :   if (SrcSubReg != PrevSrcSubReg)
    1486             :     return false;
    1487             : 
    1488       15708 :   unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
    1489             : 
    1490             :   // Only replace if the copy register class is the same.
    1491             :   //
    1492             :   // TODO: If we have multiple copies to different register classes, we may want
    1493             :   // to track multiple copies of the same source register.
    1494       47124 :   if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
    1495             :     return false;
    1496             : 
    1497       15691 :   MRI->replaceRegWith(DstReg, PrevDstReg);
    1498             : 
    1499             :   // Lifetime of the previous copy has been extended.
    1500       15691 :   MRI->clearKillFlags(PrevDstReg);
    1501             :   return true;
    1502             : }
    1503             : 
    1504             : bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
    1505     6394549 :   return TargetRegisterInfo::isPhysicalRegister(Reg) &&
    1506     2100348 :          !MRI->isAllocatable(Reg);
    1507             : }
    1508             : 
    1509      981950 : bool PeepholeOptimizer::foldRedundantNAPhysCopy(
    1510             :     MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
    1511             :   assert(MI->isCopy() && "expected a COPY machine instruction");
    1512             : 
    1513      981950 :   if (DisableNAPhysCopyOpt)
    1514             :     return false;
    1515             : 
    1516      981950 :   unsigned DstReg = MI->getOperand(0).getReg();
    1517      981950 :   unsigned SrcReg = MI->getOperand(1).getReg();
    1518     1106890 :   if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
    1519             :     // %vreg = COPY %PHYSREG
    1520             :     // Avoid using a datastructure which can track multiple live non-allocatable
    1521             :     // phys->virt copies since LLVM doesn't seem to do this.
    1522      124924 :     NAPhysToVirtMIs.insert({SrcReg, MI});
    1523       62462 :     return false;
    1524             :   }
    1525             : 
    1526     1592019 :   if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
    1527             :     return false;
    1528             : 
    1529             :   // %PHYSREG = COPY %vreg
    1530        1544 :   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
    1531        3088 :   if (PrevCopy == NAPhysToVirtMIs.end()) {
    1532             :     // We can't remove the copy: there was an intervening clobber of the
    1533             :     // non-allocatable physical register after the copy to virtual.
    1534             :     DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI
    1535             :                  << '\n');
    1536             :     return false;
    1537             :   }
    1538             : 
    1539        1214 :   unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
    1540        1214 :   if (PrevDstReg == SrcReg) {
    1541             :     // Remove the virt->phys copy: we saw the virtual register definition, and
    1542             :     // the non-allocatable physical register's state hasn't changed since then.
    1543             :     DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n');
    1544             :     ++NumNAPhysCopies;
    1545             :     return true;
    1546             :   }
    1547             : 
    1548             :   // Potential missed optimization opportunity: we saw a different virtual
    1549             :   // register get a copy of the non-allocatable physical register, and we only
    1550             :   // track one such copy. Avoid getting confused by this new non-allocatable
    1551             :   // physical register definition, and remove it from the tracked copies.
    1552             :   DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n');
    1553         586 :   NAPhysToVirtMIs.erase(PrevCopy);
    1554         293 :   return false;
    1555             : }
    1556             : 
    1557             : /// \bried Returns true if \p MO is a virtual register operand.
    1558             : static bool isVirtualRegisterOperand(MachineOperand &MO) {
    1559        2753 :   if (!MO.isReg())
    1560             :     return false;
    1561        5506 :   return TargetRegisterInfo::isVirtualRegister(MO.getReg());
    1562             : }
    1563             : 
    1564       10848 : bool PeepholeOptimizer::findTargetRecurrence(
    1565             :     unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
    1566             :     RecurrenceCycle &RC) {
    1567             :   // Recurrence found if Reg is in TargetRegs.
    1568       10848 :   if (TargetRegs.count(Reg))
    1569             :     return true;
    1570             : 
    1571             :   // TODO: Curerntly, we only allow the last instruction of the recurrence
    1572             :   // cycle (the instruction that feeds the PHI instruction) to have more than
    1573             :   // one uses to guarantee that commuting operands does not tie registers
    1574             :   // with overlapping live range. Once we have actual live range info of
    1575             :   // each register, this constraint can be relaxed.
    1576        9619 :   if (!MRI->hasOneNonDBGUse(Reg))
    1577             :     return false;
    1578             : 
    1579             :   // Give up if the reccurrence chain length is longer than the limit.
    1580       10182 :   if (RC.size() >= MaxRecurrenceChain)
    1581             :     return false;
    1582             : 
    1583        6748 :   MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
    1584        3374 :   unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
    1585             : 
    1586             :   // Only interested in recurrences whose instructions have only one def, which
    1587             :   // is a virtual register.
    1588        3374 :   if (MI.getDesc().getNumDefs() != 1)
    1589             :     return false;
    1590             : 
    1591        2753 :   MachineOperand &DefOp = MI.getOperand(0);
    1592        2753 :   if (!isVirtualRegisterOperand(DefOp))
    1593             :     return false;
    1594             : 
    1595             :   // Check if def operand of MI is tied to any use operand. We are only
    1596             :   // interested in the case that all the instructions in the recurrence chain
    1597             :   // have there def operand tied with one of the use operand.
    1598             :   unsigned TiedUseIdx;
    1599        2621 :   if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
    1600             :     return false;
    1601             : 
    1602        1523 :   if (Idx == TiedUseIdx) {
    1603        4116 :     RC.push_back(RecurrenceInstr(&MI));
    1604        1372 :     return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
    1605             :   } else {
    1606             :     // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
    1607         151 :     unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
    1608         151 :     if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
    1609         336 :       RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
    1610         112 :       return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
    1611             :     }
    1612             :   }
    1613             : 
    1614          39 :   return false;
    1615             : }
    1616             : 
    1617             : /// \brief Phi instructions will eventually be lowered to copy instructions. If
    1618             : /// phi is in a loop header, a recurrence may formulated around the source and
    1619             : /// destination of the phi. For such case commuting operands of the instructions
    1620             : /// in the recurrence may enable coalescing of the copy instruction generated
    1621             : /// from the phi. For example, if there is a recurrence of
    1622             : ///
    1623             : /// LoopHeader:
    1624             : ///   %vreg1 = phi(%vreg0, %vreg100)
    1625             : /// LoopLatch:
    1626             : ///   %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
    1627             : ///
    1628             : /// , the fact that vreg0 and vreg2 are in the same tied operands set makes
    1629             : /// the coalescing of copy instruction generated from the phi in
    1630             : /// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
    1631             : /// %vreg2 have overlapping live range. This introduces additional move
    1632             : /// instruction to the final assembly. However, if we commute %vreg2 and
    1633             : /// %vreg1 of ADD instruction, the redundant move instruction can be
    1634             : /// avoided.
    1635        9364 : bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
    1636       18728 :   SmallSet<unsigned, 2> TargetRegs;
    1637       28510 :   for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
    1638       38292 :     MachineOperand &MO = PHI.getOperand(Idx);
    1639             :     assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
    1640       19146 :     TargetRegs.insert(MO.getReg());
    1641             :   }
    1642             : 
    1643        9364 :   bool Changed = false;
    1644       18728 :   RecurrenceCycle RC;
    1645        9364 :   if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
    1646             :     // Commutes operands of instructions in RC if necessary so that the copy to
    1647             :     // be generated from PHI can be coalesced.
    1648             :     DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
    1649        4905 :     for (auto &RI : RC) {
    1650             :       DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
    1651        2436 :       auto CP = RI.getCommutePair();
    1652        1218 :       if (CP) {
    1653          83 :         Changed = true;
    1654         249 :         TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
    1655          83 :                                 (*CP).second);
    1656             :         DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
    1657             :       }
    1658             :     }
    1659             :   }
    1660             : 
    1661       18728 :   return Changed;
    1662             : }
    1663             : 
    1664      135825 : bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    1665      135825 :   if (skipFunction(*MF.getFunction()))
    1666             :     return false;
    1667             : 
    1668             :   DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
    1669             :   DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
    1670             : 
    1671      135768 :   if (DisablePeephole)
    1672             :     return false;
    1673             : 
    1674      133651 :   TII = MF.getSubtarget().getInstrInfo();
    1675      133651 :   TRI = MF.getSubtarget().getRegisterInfo();
    1676      133651 :   MRI = &MF.getRegInfo();
    1677      133651 :   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
    1678      133651 :   MLI = &getAnalysis<MachineLoopInfo>();
    1679             : 
    1680      133651 :   bool Changed = false;
    1681             : 
    1682      681240 :   for (MachineBasicBlock &MBB : MF) {
    1683      280287 :     bool SeenMoveImm = false;
    1684             : 
    1685             :     // During this forward scan, at some point it needs to answer the question
    1686             :     // "given a pointer to an MI in the current BB, is it located before or
    1687             :     // after the current instruction".
    1688             :     // To perform this, the following set keeps track of the MIs already seen
    1689             :     // during the scan, if a MI is not in the set, it is assumed to be located
    1690             :     // after. Newly created MIs have to be inserted in the set as well.
    1691      560574 :     SmallPtrSet<MachineInstr*, 16> LocalMIs;
    1692      560574 :     SmallSet<unsigned, 4> ImmDefRegs;
    1693      560574 :     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    1694      560574 :     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
    1695             : 
    1696             :     // Track when a non-allocatable physical register is copied to a virtual
    1697             :     // register so that useless moves can be removed.
    1698             :     //
    1699             :     // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
    1700             :     // without any intervening re-definition of %PHYSREG.
    1701      560574 :     DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
    1702             : 
    1703             :     // Set of virtual registers that are copied from.
    1704      560574 :     SmallSet<unsigned, 4> CopySrcRegs;
    1705      560574 :     DenseMap<unsigned, MachineInstr *> CopySrcMIs;
    1706             : 
    1707      560574 :     bool IsLoopHeader = MLI->isLoopHeader(&MBB);
    1708             : 
    1709      560574 :     for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
    1710     4159602 :          MII != MIE; ) {
    1711     3879315 :       MachineInstr *MI = &*MII;
    1712             :       // We may be erasing MI below, increment MII now.
    1713     3879315 :       ++MII;
    1714     3879315 :       LocalMIs.insert(MI);
    1715             : 
    1716             :       // Skip debug values. They should not affect this peephole optimization.
    1717     3879315 :       if (MI->isDebugValue())
    1718       17900 :           continue;
    1719             : 
    1720     3861415 :       if (MI->isPosition())
    1721       74587 :         continue;
    1722             : 
    1723     3786828 :       if (IsLoopHeader && MI->isPHI()) {
    1724        9447 :         if (optimizeRecurrence(*MI)) {
    1725          82 :           Changed = true;
    1726          82 :           continue;
    1727             :         }
    1728             :       }
    1729             : 
    1730     3786745 :       if (!MI->isCopy()) {
    1731    16446029 :         for (const auto &Op : MI->operands()) {
    1732             :           // Visit all operands: definitions can be implicit or explicit.
    1733    13673888 :           if (Op.isReg()) {
    1734     8321198 :             unsigned Reg = Op.getReg();
    1735     9566696 :             if (Op.isDef() && isNAPhysCopy(Reg)) {
    1736     1245498 :               const auto &Def = NAPhysToVirtMIs.find(Reg);
    1737     2490996 :               if (Def != NAPhysToVirtMIs.end()) {
    1738             :                 // A new definition of the non-allocatable physical register
    1739             :                 // invalidates previous copies.
    1740             :                 DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
    1741             :                              << '\n');
    1742         281 :                 NAPhysToVirtMIs.erase(Def);
    1743             :               }
    1744             :             }
    1745     5352690 :           } else if (Op.isRegMask()) {
    1746      186698 :             const uint32_t *RegMask = Op.getRegMask();
    1747      620197 :             for (auto &RegMI : NAPhysToVirtMIs) {
    1748       60103 :               unsigned Def = RegMI.first;
    1749      120206 :               if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
    1750             :                 DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
    1751             :                              << '\n');
    1752       59617 :                 NAPhysToVirtMIs.erase(Def);
    1753             :               }
    1754             :             }
    1755             :           }
    1756             :         }
    1757             :       }
    1758             : 
    1759     7573490 :       if (MI->isImplicitDef() || MI->isKill())
    1760       27698 :         continue;
    1761             : 
    1762     7513950 :       if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
    1763             :         // Blow away all non-allocatable physical registers knowledge since we
    1764             :         // don't know what's correct anymore.
    1765             :         //
    1766             :         // FIXME: handle explicit asm clobbers.
    1767             :         DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
    1768             :                      << '\n');
    1769       79935 :         NAPhysToVirtMIs.clear();
    1770             :       }
    1771             : 
    1772     3767206 :       if ((isUncoalescableCopy(*MI) &&
    1773     3765945 :            optimizeUncoalescableCopy(MI, LocalMIs)) ||
    1774    11345198 :           (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) ||
    1775     3758716 :           (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
    1776             :         // MI is deleted.
    1777        1200 :         LocalMIs.erase(MI);
    1778        1200 :         Changed = true;
    1779        1200 :         continue;
    1780             :       }
    1781             : 
    1782     3800258 :       if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
    1783           6 :         Changed = true;
    1784           6 :         continue;
    1785             :       }
    1786             : 
    1787     4859950 :       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
    1788             :         // MI is just rewritten.
    1789       20582 :         Changed = true;
    1790       20582 :         continue;
    1791             :       }
    1792             : 
    1793     4751513 :       if (MI->isCopy() &&
    1794     1979591 :           (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) ||
    1795      981950 :            foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) {
    1796       16612 :         LocalMIs.erase(MI);
    1797       16612 :         MI->eraseFromParent();
    1798       16612 :         Changed = true;
    1799       16612 :         continue;
    1800             :       }
    1801             : 
    1802     3720648 :       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
    1803             :         SeenMoveImm = true;
    1804             :       } else {
    1805     3667600 :         Changed |= optimizeExtInstr(MI, &MBB, LocalMIs);
    1806             :         // optimizeExtInstr might have created new instructions after MI
    1807             :         // and before the already incremented MII. Adjust MII so that the
    1808             :         // next iteration sees the new instructions.
    1809     3667600 :         MII = MI;
    1810     3667600 :         ++MII;
    1811     3667600 :         if (SeenMoveImm)
    1812      305569 :           Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs);
    1813             :       }
    1814             : 
    1815             :       // Check whether MI is a load candidate for folding into a later
    1816             :       // instruction. If MI is not a candidate, check whether we can fold an
    1817             :       // earlier load into MI.
    1818     3720648 :       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
    1819             :           !FoldAsLoadDefCandidates.empty()) {
    1820             : 
    1821             :         // We visit each operand even after successfully folding a previous
    1822             :         // one.  This allows us to fold multiple loads into a single
    1823             :         // instruction.  We do assume that optimizeLoadInstr doesn't insert
    1824             :         // foldable uses earlier in the argument list.  Since we don't restart
    1825             :         // iteration, we'd miss such cases.
    1826      276154 :         const MCInstrDesc &MIDesc = MI->getDesc();
    1827     1219837 :         for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
    1828             :              ++i) {
    1829     1887366 :           const MachineOperand &MOp = MI->getOperand(i);
    1830      943683 :           if (!MOp.isReg())
    1831      277360 :             continue;
    1832      666323 :           unsigned FoldAsLoadDefReg = MOp.getReg();
    1833      666323 :           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
    1834             :             // We need to fold load after optimizeCmpInstr, since
    1835             :             // optimizeCmpInstr can enable folding by converting SUB to CMP.
    1836             :             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
    1837             :             // we need it for markUsesInDebugValueAsUndef().
    1838      105665 :             unsigned FoldedReg = FoldAsLoadDefReg;
    1839      105665 :             MachineInstr *DefMI = nullptr;
    1840      105665 :             if (MachineInstr *FoldMI =
    1841      105665 :                     TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
    1842             :               // Update LocalMIs since we replaced MI with FoldMI and deleted
    1843             :               // DefMI.
    1844             :               DEBUG(dbgs() << "Replacing: " << *MI);
    1845             :               DEBUG(dbgs() << "     With: " << *FoldMI);
    1846        2204 :               LocalMIs.erase(MI);
    1847        4408 :               LocalMIs.erase(DefMI);
    1848        2204 :               LocalMIs.insert(FoldMI);
    1849        2204 :               MI->eraseFromParent();
    1850        2204 :               DefMI->eraseFromParent();
    1851        2204 :               MRI->markUsesInDebugValueAsUndef(FoldedReg);
    1852        2204 :               FoldAsLoadDefCandidates.erase(FoldedReg);
    1853        2204 :               ++NumLoadFold;
    1854             : 
    1855             :               // MI is replaced with FoldMI so we can continue trying to fold
    1856        2204 :               Changed = true;
    1857        2204 :               MI = FoldMI;
    1858             :             }
    1859             :           }
    1860             :         }
    1861             :       }
    1862             : 
    1863             :       // If we run into an instruction we can't fold across, discard
    1864             :       // the load candidates.  Note: We might be able to fold *into* this
    1865             :       // instruction, so this needs to be after the folding logic.
    1866     3720648 :       if (MI->isLoadFoldBarrier()) {
    1867             :         DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
    1868             :         FoldAsLoadDefCandidates.clear();
    1869             :       }
    1870             :     }
    1871             :   }
    1872             : 
    1873             :   return Changed;
    1874             : }
    1875             : 
    1876      625669 : ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
    1877             :   assert(Def->isCopy() && "Invalid definition");
    1878             :   // Copy instruction are supposed to be: Def = Src.
    1879             :   // If someone breaks this assumption, bad things will happen everywhere.
    1880             :   assert(Def->getNumOperands() == 2 && "Invalid number of operands");
    1881             : 
    1882     1877007 :   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
    1883             :     // If we look for a different subreg, it means we want a subreg of src.
    1884             :     // Bails as we do not support composing subregs yet.
    1885        8199 :     return ValueTrackerResult();
    1886             :   // Otherwise, we want the whole source.
    1887      617470 :   const MachineOperand &Src = Def->getOperand(1);
    1888      617470 :   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
    1889             : }
    1890             : 
    1891        2803 : ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
    1892             :   assert(Def->isBitcast() && "Invalid definition");
    1893             : 
    1894             :   // Bail if there are effects that a plain copy will not expose.
    1895        2803 :   if (Def->hasUnmodeledSideEffects())
    1896           0 :     return ValueTrackerResult();
    1897             : 
    1898             :   // Bitcasts with more than one def are not supported.
    1899        2803 :   if (Def->getDesc().getNumDefs() != 1)
    1900           0 :     return ValueTrackerResult();
    1901        5606 :   const MachineOperand DefOp = Def->getOperand(DefIdx);
    1902        2803 :   if (DefOp.getSubReg() != DefSubReg)
    1903             :     // If we look for a different subreg, it means we want a subreg of the src.
    1904             :     // Bails as we do not support composing subregs yet.
    1905           0 :     return ValueTrackerResult();
    1906             : 
    1907        2803 :   unsigned SrcIdx = Def->getNumOperands();
    1908        7794 :   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
    1909             :        ++OpIdx) {
    1910        9982 :     const MachineOperand &MO = Def->getOperand(OpIdx);
    1911        4991 :     if (!MO.isReg() || !MO.getReg())
    1912        2188 :       continue;
    1913             :     // Ignore dead implicit defs.
    1914        2803 :     if (MO.isImplicit() && MO.isDead())
    1915           0 :       continue;
    1916             :     assert(!MO.isDef() && "We should have skipped all the definitions by now");
    1917        2803 :     if (SrcIdx != EndOpIdx)
    1918             :       // Multiple sources?
    1919           0 :       return ValueTrackerResult();
    1920             :     SrcIdx = OpIdx;
    1921             :   }
    1922             : 
    1923             :   // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
    1924             :   // will break the assumed guarantees for the upper bits.
    1925       17183 :   for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
    1926        2928 :     if (UseMI.isSubregToReg())
    1927          10 :       return ValueTrackerResult();
    1928             :   }
    1929             : 
    1930        5596 :   const MachineOperand &Src = Def->getOperand(SrcIdx);
    1931        2798 :   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
    1932             : }
    1933             : 
    1934      165171 : ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
    1935             :   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
    1936             :          "Invalid definition");
    1937             : 
    1938      495513 :   if (Def->getOperand(DefIdx).getSubReg())
    1939             :     // If we are composing subregs, bail out.
    1940             :     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
    1941             :     // This should almost never happen as the SSA property is tracked at
    1942             :     // the register level (as opposed to the subreg level).
    1943             :     // I.e.,
    1944             :     // Def.sub0 =
    1945             :     // Def.sub1 =
    1946             :     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
    1947             :     // Def. Thus, it must not be generated.
    1948             :     // However, some code could theoretically generates a single
    1949             :     // Def.sub0 (i.e, not defining the other subregs) and we would
    1950             :     // have this case.
    1951             :     // If we can ascertain (or force) that this never happens, we could
    1952             :     // turn that into an assertion.
    1953           0 :     return ValueTrackerResult();
    1954             : 
    1955      165171 :   if (!TII)
    1956             :     // We could handle the REG_SEQUENCE here, but we do not want to
    1957             :     // duplicate the code from the generic TII.
    1958           0 :     return ValueTrackerResult();
    1959             : 
    1960      165171 :   SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
    1961      165171 :   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
    1962           5 :     return ValueTrackerResult();
    1963             : 
    1964             :   // We are looking at:
    1965             :   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
    1966             :   // Check if one of the operand defines the subreg we are interested in.
    1967      675005 :   for (auto &RegSeqInput : RegSeqInputRegs) {
    1968      340545 :     if (RegSeqInput.SubIdx == DefSubReg) {
    1969      161038 :       if (RegSeqInput.SubReg)
    1970             :         // Bail if we have to compose sub registers.
    1971        1913 :         return ValueTrackerResult();
    1972             : 
    1973      159125 :       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
    1974             :     }
    1975             :   }
    1976             : 
    1977             :   // If the subreg we are tracking is super-defined by another subreg,
    1978             :   // we could follow this value. However, this would require to compose
    1979             :   // the subreg and we do not do that for now.
    1980        4128 :   return ValueTrackerResult();
    1981             : }
    1982             : 
    1983       22580 : ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
    1984             :   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
    1985             :          "Invalid definition");
    1986             : 
    1987       67740 :   if (Def->getOperand(DefIdx).getSubReg())
    1988             :     // If we are composing subreg, bail out.
    1989             :     // Same remark as getNextSourceFromRegSequence.
    1990             :     // I.e., this may be turned into an assert.
    1991           0 :     return ValueTrackerResult();
    1992             : 
    1993       22580 :   if (!TII)
    1994             :     // We could handle the REG_SEQUENCE here, but we do not want to
    1995             :     // duplicate the code from the generic TII.
    1996           0 :     return ValueTrackerResult();
    1997             : 
    1998       22580 :   TargetInstrInfo::RegSubRegPair BaseReg;
    1999       22580 :   TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
    2000       22580 :   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
    2001           0 :     return ValueTrackerResult();
    2002             : 
    2003             :   // We are looking at:
    2004             :   // Def = INSERT_SUBREG v0, v1, sub1
    2005             :   // There are two cases:
    2006             :   // 1. DefSubReg == sub1, get v1.
    2007             :   // 2. DefSubReg != sub1, the value may be available through v0.
    2008             : 
    2009             :   // #1 Check if the inserted register matches the required sub index.
    2010       22580 :   if (InsertedReg.SubIdx == DefSubReg) {
    2011       16783 :     return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
    2012             :   }
    2013             :   // #2 Otherwise, if the sub register we are looking for is not partial
    2014             :   // defined by the inserted element, we can look through the main
    2015             :   // register (v0).
    2016       11594 :   const MachineOperand &MODef = Def->getOperand(DefIdx);
    2017             :   // If the result register (Def) and the base register (v0) do not
    2018             :   // have the same register class or if we have to compose
    2019             :   // subregisters, bail out.
    2020       22971 :   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
    2021        5580 :       BaseReg.SubReg)
    2022         217 :     return ValueTrackerResult();
    2023             : 
    2024             :   // Get the TRI and check if the inserted sub-register overlaps with the
    2025             :   // sub-register we are tracking.
    2026       11160 :   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
    2027       11160 :   if (!TRI ||
    2028       16740 :       !(TRI->getSubRegIndexLaneMask(DefSubReg) &
    2029       16740 :         TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
    2030        5573 :     return ValueTrackerResult();
    2031             :   // At this point, the value is available in v0 via the same subreg
    2032             :   // we used for Def.
    2033           7 :   return ValueTrackerResult(BaseReg.Reg, DefSubReg);
    2034             : }
    2035             : 
    2036        2879 : ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
    2037             :   assert((Def->isExtractSubreg() ||
    2038             :           Def->isExtractSubregLike()) && "Invalid definition");
    2039             :   // We are looking at:
    2040             :   // Def = EXTRACT_SUBREG v0, sub0
    2041             : 
    2042             :   // Bail if we have to compose sub registers.
    2043             :   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
    2044        2879 :   if (DefSubReg)
    2045           0 :     return ValueTrackerResult();
    2046             : 
    2047        2879 :   if (!TII)
    2048             :     // We could handle the EXTRACT_SUBREG here, but we do not want to
    2049             :     // duplicate the code from the generic TII.
    2050           0 :     return ValueTrackerResult();
    2051             : 
    2052        2879 :   TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
    2053        2879 :   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
    2054           0 :     return ValueTrackerResult();
    2055             : 
    2056             :   // Bail if we have to compose sub registers.
    2057             :   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
    2058        2879 :   if (ExtractSubregInputReg.SubReg)
    2059           0 :     return ValueTrackerResult();
    2060             :   // Otherwise, the value is available in the v0.sub0.
    2061             :   return ValueTrackerResult(ExtractSubregInputReg.Reg,
    2062        2879 :                             ExtractSubregInputReg.SubIdx);
    2063             : }
    2064             : 
    2065          28 : ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
    2066             :   assert(Def->isSubregToReg() && "Invalid definition");
    2067             :   // We are looking at:
    2068             :   // Def = SUBREG_TO_REG Imm, v0, sub0
    2069             : 
    2070             :   // Bail if we have to compose sub registers.
    2071             :   // If DefSubReg != sub0, we would have to check that all the bits
    2072             :   // we track are included in sub0 and if yes, we would have to
    2073             :   // determine the right subreg in v0.
    2074          28 :   if (DefSubReg != Def->getOperand(3).getImm())
    2075          28 :     return ValueTrackerResult();
    2076             :   // Bail if we have to compose sub registers.
    2077             :   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
    2078           0 :   if (Def->getOperand(2).getSubReg())
    2079           0 :     return ValueTrackerResult();
    2080             : 
    2081           0 :   return ValueTrackerResult(Def->getOperand(2).getReg(),
    2082           0 :                             Def->getOperand(3).getImm());
    2083             : }
    2084             : 
    2085             : /// \brief Explore each PHI incoming operand and return its sources
    2086         256 : ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
    2087             :   assert(Def->isPHI() && "Invalid definition");
    2088         512 :   ValueTrackerResult Res;
    2089             : 
    2090             :   // If we look for a different subreg, bail as we do not support composing
    2091             :   // subregs yet.
    2092         512 :   if (Def->getOperand(0).getSubReg() != DefSubReg)
    2093         135 :     return ValueTrackerResult();
    2094             : 
    2095             :   // Return all register sources for PHI instructions.
    2096         371 :   for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
    2097         500 :     auto &MO = Def->getOperand(i);
    2098             :     assert(MO.isReg() && "Invalid PHI instruction");
    2099         500 :     Res.addSource(MO.getReg(), MO.getSubReg());
    2100             :   }
    2101             : 
    2102             :   return Res;
    2103             : }
    2104             : 
    2105     1040290 : ValueTrackerResult ValueTracker::getNextSourceImpl() {
    2106             :   assert(Def && "This method needs a valid definition");
    2107             : 
    2108             :   assert(((Def->getOperand(DefIdx).isDef() &&
    2109             :            (DefIdx < Def->getDesc().getNumDefs() ||
    2110             :             Def->getDesc().isVariadic())) ||
    2111             :           Def->getOperand(DefIdx).isImplicit()) &&
    2112             :          "Invalid DefIdx");
    2113     2080580 :   if (Def->isCopy())
    2114      625669 :     return getNextSourceFromCopy();
    2115      829242 :   if (Def->isBitcast())
    2116        2803 :     return getNextSourceFromBitcast();
    2117             :   // All the remaining cases involve "complex" instructions.
    2118             :   // Bail if we did not ask for the advanced tracking.
    2119      411818 :   if (!UseAdvancedTracking)
    2120          38 :     return ValueTrackerResult();
    2121     1071443 :   if (Def->isRegSequence() || Def->isRegSequenceLike())
    2122      165171 :     return getNextSourceFromRegSequence();
    2123      717442 :   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
    2124       22580 :     return getNextSourceFromInsertSubreg();
    2125      896116 :   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
    2126        2879 :     return getNextSourceFromExtractSubreg();
    2127      442300 :   if (Def->isSubregToReg())
    2128          28 :     return getNextSourceFromSubregToReg();
    2129      441988 :   if (Def->isPHI())
    2130         256 :     return getNextSourceFromPHI();
    2131      220866 :   return ValueTrackerResult();
    2132             : }
    2133             : 
    2134     1040290 : ValueTrackerResult ValueTracker::getNextSource() {
    2135             :   // If we reach a point where we cannot move up in the use-def chain,
    2136             :   // there is nothing we can get.
    2137     1040290 :   if (!Def)
    2138           0 :     return ValueTrackerResult();
    2139             : 
    2140     1040290 :   ValueTrackerResult Res = getNextSourceImpl();
    2141     1040290 :   if (Res.isValid()) {
    2142             :     // Update definition, definition index, and subregister for the
    2143             :     // next call of getNextSource.
    2144             :     // Update the current register.
    2145      799183 :     bool OneRegSrc = Res.getNumSources() == 1;
    2146      799183 :     if (OneRegSrc)
    2147      799062 :       Reg = Res.getSrcReg(0);
    2148             :     // Update the result before moving up in the use-def chain
    2149             :     // with the instruction containing the last found sources.
    2150     1598366 :     Res.setInst(Def);
    2151             : 
    2152             :     // If we can still move up in the use-def chain, move to the next
    2153             :     // definition.
    2154     1598366 :     if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
    2155      482399 :       Def = MRI.getVRegDef(Reg);
    2156      964798 :       DefIdx = MRI.def_begin(Reg).getOperandNo();
    2157      482399 :       DefSubReg = Res.getSrcSubReg(0);
    2158             :       return Res;
    2159             :     }
    2160             :   }
    2161             :   // If we end up here, this means we will not be able to find another source
    2162             :   // for the next iteration. Make sure any new call to getNextSource bails out
    2163             :   // early by cutting the use-def chain.
    2164      557891 :   Def = nullptr;
    2165             :   return Res;
    2166      216918 : }

Generated by: LCOV version 1.13