LCOV - code coverage report
Current view: top level - lib/CodeGen - PeepholeOptimizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 548 604 90.7 %
Date: 2018-06-17 00:07:59 Functions: 46 57 80.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Perform peephole optimizations on the machine code:
      11             : //
      12             : // - Optimize Extensions
      13             : //
      14             : //     Optimization of sign / zero extension instructions. It may be extended to
      15             : //     handle other instructions with similar properties.
      16             : //
      17             : //     On some targets, some instructions, e.g. X86 sign / zero extension, may
      18             : //     leave the source value in the lower part of the result. This optimization
      19             : //     will replace some uses of the pre-extension value with uses of the
      20             : //     sub-register of the results.
      21             : //
      22             : // - Optimize Comparisons
      23             : //
      24             : //     Optimization of comparison instructions. For instance, in this code:
      25             : //
      26             : //       sub r1, 1
      27             : //       cmp r1, 0
      28             : //       bz  L1
      29             : //
      30             : //     If the "sub" instruction all ready sets (or could be modified to set) the
      31             : //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
      32             : //     eliminate the "cmp" instruction.
      33             : //
      34             : //     Another instance, in this code:
      35             : //
      36             : //       sub r1, r3 | sub r1, imm
      37             : //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
      38             : //       bge L1
      39             : //
      40             : //     If the branch instruction can use flag from "sub", then we can replace
      41             : //     "sub" with "subs" and eliminate the "cmp" instruction.
      42             : //
      43             : // - Optimize Loads:
      44             : //
      45             : //     Loads that can be folded into a later instruction. A load is foldable
      46             : //     if it loads to virtual registers and the virtual register defined has
      47             : //     a single use.
      48             : //
      49             : // - Optimize Copies and Bitcast (more generally, target specific copies):
      50             : //
      51             : //     Rewrite copies and bitcasts to avoid cross register bank copies
      52             : //     when possible.
      53             : //     E.g., Consider the following example, where capital and lower
      54             : //     letters denote different register file:
      55             : //     b = copy A <-- cross-bank copy
      56             : //     C = copy b <-- cross-bank copy
      57             : //   =>
      58             : //     b = copy A <-- cross-bank copy
      59             : //     C = copy A <-- same-bank copy
      60             : //
      61             : //     E.g., for bitcast:
      62             : //     b = bitcast A <-- cross-bank copy
      63             : //     C = bitcast b <-- cross-bank copy
      64             : //   =>
      65             : //     b = bitcast A <-- cross-bank copy
      66             : //     C = copy A    <-- same-bank copy
      67             : //===----------------------------------------------------------------------===//
      68             : 
      69             : #include "llvm/ADT/DenseMap.h"
      70             : #include "llvm/ADT/Optional.h"
      71             : #include "llvm/ADT/SmallPtrSet.h"
      72             : #include "llvm/ADT/SmallSet.h"
      73             : #include "llvm/ADT/SmallVector.h"
      74             : #include "llvm/ADT/Statistic.h"
      75             : #include "llvm/CodeGen/MachineBasicBlock.h"
      76             : #include "llvm/CodeGen/MachineDominators.h"
      77             : #include "llvm/CodeGen/MachineFunction.h"
      78             : #include "llvm/CodeGen/MachineFunctionPass.h"
      79             : #include "llvm/CodeGen/MachineInstr.h"
      80             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      81             : #include "llvm/CodeGen/MachineLoopInfo.h"
      82             : #include "llvm/CodeGen/MachineOperand.h"
      83             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      84             : #include "llvm/CodeGen/TargetInstrInfo.h"
      85             : #include "llvm/CodeGen/TargetOpcodes.h"
      86             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      87             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      88             : #include "llvm/MC/LaneBitmask.h"
      89             : #include "llvm/MC/MCInstrDesc.h"
      90             : #include "llvm/Pass.h"
      91             : #include "llvm/Support/CommandLine.h"
      92             : #include "llvm/Support/Debug.h"
      93             : #include "llvm/Support/ErrorHandling.h"
      94             : #include "llvm/Support/raw_ostream.h"
      95             : #include <cassert>
      96             : #include <cstdint>
      97             : #include <memory>
      98             : #include <utility>
      99             : 
     100             : using namespace llvm;
     101             : using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
     102             : using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx;
     103             : 
     104             : #define DEBUG_TYPE "peephole-opt"
     105             : 
     106             : // Optimize Extensions
     107             : static cl::opt<bool>
     108      101169 : Aggressive("aggressive-ext-opt", cl::Hidden,
     109      101169 :            cl::desc("Aggressive extension optimization"));
     110             : 
     111             : static cl::opt<bool>
     112      303507 : DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
     113      202338 :                 cl::desc("Disable the peephole optimizer"));
     114             : 
     115             : /// Specifiy whether or not the value tracking looks through
     116             : /// complex instructions. When this is true, the value tracker
     117             : /// bails on everything that is not a copy or a bitcast.
     118             : static cl::opt<bool>
     119      303507 : DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
     120      202338 :                   cl::desc("Disable advanced copy optimization"));
     121             : 
     122      101169 : static cl::opt<bool> DisableNAPhysCopyOpt(
     123      202338 :     "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
     124      202338 :     cl::desc("Disable non-allocatable physical register copy optimization"));
     125             : 
     126             : // Limit the number of PHI instructions to process
     127             : // in PeepholeOptimizer::getNextSource.
     128      101169 : static cl::opt<unsigned> RewritePHILimit(
     129      202338 :     "rewrite-phi-limit", cl::Hidden, cl::init(10),
     130      202338 :     cl::desc("Limit the length of PHI chains to lookup"));
     131             : 
     132             : // Limit the length of recurrence chain when evaluating the benefit of
     133             : // commuting operands.
     134      101169 : static cl::opt<unsigned> MaxRecurrenceChain(
     135      202338 :     "recurrence-chain-limit", cl::Hidden, cl::init(3),
     136      101169 :     cl::desc("Maximum length of recurrence chain when evaluating the benefit "
     137      101169 :              "of commuting operands"));
     138             : 
     139             : 
     140             : STATISTIC(NumReuse, "Number of extension results reused");
     141             : STATISTIC(NumCmps, "Number of compares eliminated");
     142             : STATISTIC(NumImmFold, "Number of move immediate folded");
     143             : STATISTIC(NumLoadFold, "Number of loads folded");
     144             : STATISTIC(NumSelects, "Number of selects optimized");
     145             : STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
     146             : STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
     147             : STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
     148             : 
     149             : namespace {
     150             : 
     151             :   class ValueTrackerResult;
     152             :   class RecurrenceInstr;
     153             : 
     154       19034 :   class PeepholeOptimizer : public MachineFunctionPass {
     155             :     const TargetInstrInfo *TII;
     156             :     const TargetRegisterInfo *TRI;
     157             :     MachineRegisterInfo *MRI;
     158             :     MachineDominatorTree *DT;  // Machine dominator tree
     159             :     MachineLoopInfo *MLI;
     160             : 
     161             :   public:
     162             :     static char ID; // Pass identification
     163             : 
     164       19137 :     PeepholeOptimizer() : MachineFunctionPass(ID) {
     165       19137 :       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     166       19137 :     }
     167             : 
     168             :     bool runOnMachineFunction(MachineFunction &MF) override;
     169             : 
     170       18984 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     171       18984 :       AU.setPreservesCFG();
     172       18984 :       MachineFunctionPass::getAnalysisUsage(AU);
     173             :       AU.addRequired<MachineLoopInfo>();
     174             :       AU.addPreserved<MachineLoopInfo>();
     175       18984 :       if (Aggressive) {
     176             :         AU.addRequired<MachineDominatorTree>();
     177             :         AU.addPreserved<MachineDominatorTree>();
     178             :       }
     179       18984 :     }
     180             : 
     181             :     /// Track Def -> Use info used for rewriting copies.
     182             :     using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
     183             : 
     184             :     /// Sequence of instructions that formulate recurrence cycle.
     185             :     using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
     186             : 
     187             :   private:
     188             :     bool optimizeCmpInstr(MachineInstr &MI);
     189             :     bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
     190             :                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
     191             :     bool optimizeSelect(MachineInstr &MI,
     192             :                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
     193             :     bool optimizeCondBranch(MachineInstr &MI);
     194             :     bool optimizeCoalescableCopy(MachineInstr &MI);
     195             :     bool optimizeUncoalescableCopy(MachineInstr &MI,
     196             :                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
     197             :     bool optimizeRecurrence(MachineInstr &PHI);
     198             :     bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
     199             :     bool isMoveImmediate(MachineInstr &MI,
     200             :                          SmallSet<unsigned, 4> &ImmDefRegs,
     201             :                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     202             :     bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
     203             :                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     204             : 
     205             :     /// Finds recurrence cycles, but only ones that formulated around
     206             :     /// a def operand and a use operand that are tied. If there is a use
     207             :     /// operand commutable with the tied use operand, find recurrence cycle
     208             :     /// along that operand as well.
     209             :     bool findTargetRecurrence(unsigned Reg,
     210             :                               const SmallSet<unsigned, 2> &TargetReg,
     211             :                               RecurrenceCycle &RC);
     212             : 
     213             :     /// If copy instruction \p MI is a virtual register copy, track it in
     214             :     /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
     215             :     /// previously seen as a copy, replace the uses of this copy with the
     216             :     /// previously seen copy's destination register.
     217             :     bool foldRedundantCopy(MachineInstr &MI,
     218             :                            SmallSet<unsigned, 4> &CopySrcRegs,
     219             :                            DenseMap<unsigned, MachineInstr *> &CopyMIs);
     220             : 
     221             :     /// Is the register \p Reg a non-allocatable physical register?
     222             :     bool isNAPhysCopy(unsigned Reg);
     223             : 
     224             :     /// If copy instruction \p MI is a non-allocatable virtual<->physical
     225             :     /// register copy, track it in the \p NAPhysToVirtMIs map. If this
     226             :     /// non-allocatable physical register was previously copied to a virtual
     227             :     /// registered and hasn't been clobbered, the virt->phys copy can be
     228             :     /// deleted.
     229             :     bool foldRedundantNAPhysCopy(MachineInstr &MI,
     230             :         DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
     231             : 
     232             :     bool isLoadFoldable(MachineInstr &MI,
     233             :                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
     234             : 
     235             :     /// Check whether \p MI is understood by the register coalescer
     236             :     /// but may require some rewriting.
     237             :     bool isCoalescableCopy(const MachineInstr &MI) {
     238             :       // SubregToRegs are not interesting, because they are already register
     239             :       // coalescer friendly.
     240     6629220 :       return MI.isCopy() || (!DisableAdvCopyOpt &&
     241     2727258 :                              (MI.isRegSequence() || MI.isInsertSubreg() ||
     242             :                               MI.isExtractSubreg()));
     243             :     }
     244             : 
     245             :     /// Check whether \p MI is a copy like instruction that is
     246             :     /// not recognized by the register coalescer.
     247     3849106 :     bool isUncoalescableCopy(const MachineInstr &MI) {
     248     7693965 :       return MI.isBitcast() ||
     249     3844589 :              (!DisableAdvCopyOpt &&
     250     7686520 :               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
     251     3849106 :                MI.isExtractSubregLike()));
     252             :     }
     253             : 
     254             :     MachineInstr &rewriteSource(MachineInstr &CopyLike,
     255             :                                 RegSubRegPair Def, RewriteMapTy &RewriteMap);
     256             :   };
     257             : 
     258             :   /// Helper class to hold instructions that are inside recurrence cycles.
     259             :   /// The recurrence cycle is formulated around 1) a def operand and its
     260             :   /// tied use operand, or 2) a def operand and a use operand that is commutable
     261             :   /// with another use operand which is tied to the def operand. In the latter
     262             :   /// case, index of the tied use operand and the commutable use operand are
     263             :   /// maintained with CommutePair.
     264        1811 :   class RecurrenceInstr {
     265             :   public:
     266             :     using IndexPair = std::pair<unsigned, unsigned>;
     267             : 
     268        1669 :     RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
     269             :     RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
     270         142 :       : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
     271             : 
     272             :     MachineInstr *getMI() const { return MI; }
     273             :     Optional<IndexPair> getCommutePair() const { return CommutePair; }
     274             : 
     275             :   private:
     276             :     MachineInstr *MI;
     277             :     Optional<IndexPair> CommutePair;
     278             :   };
     279             : 
     280             :   /// Helper class to hold a reply for ValueTracker queries.
     281             :   /// Contains the returned sources for a given search and the instructions
     282             :   /// where the sources were tracked from.
     283     6066330 :   class ValueTrackerResult {
     284             :   private:
     285             :     /// Track all sources found by one ValueTracker query.
     286             :     SmallVector<RegSubRegPair, 2> RegSrcs;
     287             : 
     288             :     /// Instruction using the sources in 'RegSrcs'.
     289             :     const MachineInstr *Inst = nullptr;
     290             : 
     291             :   public:
     292         273 :     ValueTrackerResult() = default;
     293             : 
     294      923802 :     ValueTrackerResult(unsigned Reg, unsigned SubReg) {
     295             :       addSource(Reg, SubReg);
     296             :     }
     297             : 
     298             :     bool isValid() const { return getNumSources() > 0; }
     299             : 
     300      923934 :     void setInst(const MachineInstr *I) { Inst = I; }
     301             :     const MachineInstr *getInst() const { return Inst; }
     302             : 
     303             :     void clear() {
     304             :       RegSrcs.clear();
     305             :       Inst = nullptr;
     306             :     }
     307             : 
     308             :     void addSource(unsigned SrcReg, unsigned SrcSubReg) {
     309     1847876 :       RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
     310             :     }
     311             : 
     312             :     void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
     313             :       assert(Idx < getNumSources() && "Reg pair source out of index");
     314             :       RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
     315             :     }
     316             : 
     317     3675478 :     int getNumSources() const { return RegSrcs.size(); }
     318             : 
     319             :     RegSubRegPair getSrc(int Idx) const {
     320      924341 :       return RegSrcs[Idx];
     321             :     }
     322             : 
     323             :     unsigned getSrcReg(int Idx) const {
     324             :       assert(Idx < getNumSources() && "Reg source out of index");
     325     1134495 :       return RegSrcs[Idx].Reg;
     326             :     }
     327             : 
     328             :     unsigned getSrcSubReg(int Idx) const {
     329             :       assert(Idx < getNumSources() && "SubReg source out of index");
     330      748696 :       return RegSrcs[Idx].SubReg;
     331             :     }
     332             : 
     333             :     bool operator==(const ValueTrackerResult &Other) {
     334             :       if (Other.getInst() != getInst())
     335             :         return false;
     336             : 
     337             :       if (Other.getNumSources() != getNumSources())
     338             :         return false;
     339             : 
     340             :       for (int i = 0, e = Other.getNumSources(); i != e; ++i)
     341             :         if (Other.getSrcReg(i) != getSrcReg(i) ||
     342             :             Other.getSrcSubReg(i) != getSrcSubReg(i))
     343             :           return false;
     344             :       return true;
     345             :     }
     346             :   };
     347             : 
     348             :   /// Helper class to track the possible sources of a value defined by
     349             :   /// a (chain of) copy related instructions.
     350             :   /// Given a definition (instruction and definition index), this class
     351             :   /// follows the use-def chain to find successive suitable sources.
     352             :   /// The given source can be used to rewrite the definition into
     353             :   /// def = COPY src.
     354             :   ///
     355             :   /// For instance, let us consider the following snippet:
     356             :   /// v0 =
     357             :   /// v2 = INSERT_SUBREG v1, v0, sub0
     358             :   /// def = COPY v2.sub0
     359             :   ///
     360             :   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
     361             :   /// suitable sources:
     362             :   /// v2.sub0 and v0.
     363             :   /// Then, def can be rewritten into def = COPY v0.
     364             :   class ValueTracker {
     365             :   private:
     366             :     /// The current point into the use-def chain.
     367             :     const MachineInstr *Def = nullptr;
     368             : 
     369             :     /// The index of the definition in Def.
     370             :     unsigned DefIdx = 0;
     371             : 
     372             :     /// The sub register index of the definition.
     373             :     unsigned DefSubReg;
     374             : 
     375             :     /// The register where the value can be found.
     376             :     unsigned Reg;
     377             : 
     378             :     /// MachineRegisterInfo used to perform tracking.
     379             :     const MachineRegisterInfo &MRI;
     380             : 
     381             :     /// Optional TargetInstrInfo used to perform some complex tracking.
     382             :     const TargetInstrInfo *TII;
     383             : 
     384             :     /// Dispatcher to the right underlying implementation of getNextSource.
     385             :     ValueTrackerResult getNextSourceImpl();
     386             : 
     387             :     /// Specialized version of getNextSource for Copy instructions.
     388             :     ValueTrackerResult getNextSourceFromCopy();
     389             : 
     390             :     /// Specialized version of getNextSource for Bitcast instructions.
     391             :     ValueTrackerResult getNextSourceFromBitcast();
     392             : 
     393             :     /// Specialized version of getNextSource for RegSequence instructions.
     394             :     ValueTrackerResult getNextSourceFromRegSequence();
     395             : 
     396             :     /// Specialized version of getNextSource for InsertSubreg instructions.
     397             :     ValueTrackerResult getNextSourceFromInsertSubreg();
     398             : 
     399             :     /// Specialized version of getNextSource for ExtractSubreg instructions.
     400             :     ValueTrackerResult getNextSourceFromExtractSubreg();
     401             : 
     402             :     /// Specialized version of getNextSource for SubregToReg instructions.
     403             :     ValueTrackerResult getNextSourceFromSubregToReg();
     404             : 
     405             :     /// Specialized version of getNextSource for PHI instructions.
     406             :     ValueTrackerResult getNextSourceFromPHI();
     407             : 
     408             :   public:
     409             :     /// Create a ValueTracker instance for the value defined by \p Reg.
     410             :     /// \p DefSubReg represents the sub register index the value tracker will
     411             :     /// track. It does not need to match the sub register index used in the
     412             :     /// definition of \p Reg.
     413             :     /// If \p Reg is a physical register, a value tracker constructed with
     414             :     /// this constructor will not find any alternative source.
     415             :     /// Indeed, when \p Reg is a physical register that constructor does not
     416             :     /// know which definition of \p Reg it should track.
     417             :     /// Use the next constructor to track a physical register.
     418      822707 :     ValueTracker(unsigned Reg, unsigned DefSubReg,
     419             :                  const MachineRegisterInfo &MRI,
     420             :                  const TargetInstrInfo *TII = nullptr)
     421      822707 :         : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
     422      822707 :       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
     423      822707 :         Def = MRI.getVRegDef(Reg);
     424     1645414 :         DefIdx = MRI.def_begin(Reg).getOperandNo();
     425             :       }
     426      822707 :     }
     427             : 
     428             :     /// Following the use-def chain, get the next available source
     429             :     /// for the tracked value.
     430             :     /// \return A ValueTrackerResult containing a set of registers
     431             :     /// and sub registers with tracked values. A ValueTrackerResult with
     432             :     /// an empty set of registers means no source was found.
     433             :     ValueTrackerResult getNextSource();
     434             :   };
     435             : 
     436             : } // end anonymous namespace
     437             : 
     438             : char PeepholeOptimizer::ID = 0;
     439             : 
     440             : char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
     441             : 
     442       26770 : INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
     443             :                       "Peephole Optimizations", false, false)
     444       26770 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     445       26770 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     446      186386 : INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
     447             :                     "Peephole Optimizations", false, false)
     448             : 
     449             : /// If instruction is a copy-like instruction, i.e. it reads a single register
     450             : /// and writes a single register and it does not modify the source, and if the
     451             : /// source value is preserved as a sub-register of the result, then replace all
     452             : /// reachable uses of the source with the subreg of the result.
     453             : ///
     454             : /// Do not generate an EXTRACT that is used only in a debug use, as this changes
     455             : /// the code. Since this code does not currently share EXTRACTs, just ignore all
     456             : /// debug uses.
     457     3738229 : bool PeepholeOptimizer::
     458             : optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
     459             :                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
     460             :   unsigned SrcReg, DstReg, SubIdx;
     461     3738229 :   if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
     462             :     return false;
     463             : 
     464       39427 :   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
     465       13139 :       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     466             :     return false;
     467             : 
     468       13139 :   if (MRI->hasOneNonDBGUse(SrcReg))
     469             :     // No other uses.
     470             :     return false;
     471             : 
     472             :   // Ensure DstReg can get a register class that actually supports
     473             :   // sub-registers. Don't change the class until we commit.
     474         492 :   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
     475         492 :   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
     476         492 :   if (!DstRC)
     477             :     return false;
     478             : 
     479             :   // The ext instr may be operating on a sub-register of SrcReg as well.
     480             :   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
     481             :   // register.
     482             :   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
     483             :   // SrcReg:SubIdx should be replaced.
     484             :   bool UseSrcSubIdx =
     485         984 :       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
     486             : 
     487             :   // The source has other uses. See if we can replace the other uses with use of
     488             :   // the result of the extension.
     489             :   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
     490        2678 :   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
     491         601 :     ReachedBBs.insert(UI.getParent());
     492             : 
     493             :   // Uses that are in the same BB of uses of the result of the instruction.
     494             :   SmallVector<MachineOperand*, 8> Uses;
     495             : 
     496             :   // Uses that the result of the instruction can reach.
     497             :   SmallVector<MachineOperand*, 8> ExtendedUses;
     498             : 
     499             :   bool ExtendLife = true;
     500        1874 :   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
     501        1057 :     MachineInstr *UseMI = UseMO.getParent();
     502        1057 :     if (UseMI == &MI)
     503         385 :       continue;
     504             : 
     505          32 :     if (UseMI->isPHI()) {
     506             :       ExtendLife = false;
     507          32 :       continue;
     508             :     }
     509             : 
     510             :     // Only accept uses of SrcReg:SubIdx.
     511         650 :     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
     512           3 :       continue;
     513             : 
     514             :     // It's an error to translate this:
     515             :     //
     516             :     //    %reg1025 = <sext> %reg1024
     517             :     //     ...
     518             :     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
     519             :     //
     520             :     // into this:
     521             :     //
     522             :     //    %reg1025 = <sext> %reg1024
     523             :     //     ...
     524             :     //    %reg1027 = COPY %reg1025:4
     525             :     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
     526             :     //
     527             :     // The problem here is that SUBREG_TO_REG is there to assert that an
     528             :     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
     529             :     // the COPY here, it will give us the value after the <sext>, not the
     530             :     // original value of %reg1024 before <sext>.
     531         639 :     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
     532           2 :       continue;
     533             : 
     534         635 :     MachineBasicBlock *UseMBB = UseMI->getParent();
     535         635 :     if (UseMBB == &MBB) {
     536             :       // Local uses that come after the extension.
     537         459 :       if (!LocalMIs.count(UseMI))
     538         324 :         Uses.push_back(&UseMO);
     539         176 :     } else if (ReachedBBs.count(UseMBB)) {
     540             :       // Non-local uses where the result of the extension is used. Always
     541             :       // replace these unless it's a PHI.
     542           9 :       Uses.push_back(&UseMO);
     543         167 :     } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
     544             :       // We may want to extend the live range of the extension result in order
     545             :       // to replace these uses.
     546           0 :       ExtendedUses.push_back(&UseMO);
     547             :     } else {
     548             :       // Both will be live out of the def MBB anyway. Don't extend live range of
     549             :       // the extension result.
     550             :       ExtendLife = false;
     551             :       break;
     552             :     }
     553             :   }
     554             : 
     555         492 :   if (ExtendLife && !ExtendedUses.empty())
     556             :     // Extend the liveness of the extension result.
     557           0 :     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
     558             : 
     559             :   // Now replace all uses.
     560             :   bool Changed = false;
     561         492 :   if (!Uses.empty()) {
     562             :     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
     563             : 
     564             :     // Look for PHI uses of the extended result, we don't want to extend the
     565             :     // liveness of a PHI input. It breaks all kinds of assumptions down
     566             :     // stream. A PHI use is expected to be the kill of its source values.
     567        1033 :     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
     568             :       if (UI.isPHI())
     569           2 :         PHIBBs.insert(UI.getParent());
     570             : 
     571         193 :     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
     572         526 :     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
     573         666 :       MachineOperand *UseMO = Uses[i];
     574         333 :       MachineInstr *UseMI = UseMO->getParent();
     575         333 :       MachineBasicBlock *UseMBB = UseMI->getParent();
     576         333 :       if (PHIBBs.count(UseMBB))
     577           0 :         continue;
     578             : 
     579             :       // About to add uses of DstReg, clear DstReg's kill flags.
     580         333 :       if (!Changed) {
     581         193 :         MRI->clearKillFlags(DstReg);
     582         193 :         MRI->constrainRegClass(DstReg, DstRC);
     583             :       }
     584             : 
     585         666 :       unsigned NewVR = MRI->createVirtualRegister(RC);
     586         666 :       MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
     587         333 :                                    TII->get(TargetOpcode::COPY), NewVR)
     588         333 :         .addReg(DstReg, 0, SubIdx);
     589             :       // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
     590         333 :       if (UseSrcSubIdx) {
     591           1 :         Copy->getOperand(0).setSubReg(SubIdx);
     592           1 :         Copy->getOperand(0).setIsUndef();
     593             :       }
     594         333 :       UseMO->setReg(NewVR);
     595             :       ++NumReuse;
     596             :       Changed = true;
     597             :     }
     598             :   }
     599             : 
     600             :   return Changed;
     601             : }
     602             : 
     603             : /// If the instruction is a compare and the previous instruction it's comparing
     604             : /// against already sets (or could be modified to set) the same flag as the
     605             : /// compare, then we can remove the comparison and use the flag from the
     606             : /// previous instruction.
     607       73567 : bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
     608             :   // If this instruction is a comparison against zero and isn't comparing a
     609             :   // physical register, we can try to optimize it.
     610             :   unsigned SrcReg, SrcReg2;
     611             :   int CmpMask, CmpValue;
     612      139724 :   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
     613      139716 :       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
     614       81486 :       (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
     615             :     return false;
     616             : 
     617             :   // Attempt to optimize the comparison instruction.
     618       66149 :   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
     619             :     ++NumCmps;
     620             :     return true;
     621             :   }
     622             : 
     623             :   return false;
     624             : }
     625             : 
     626             : /// Optimize a select instruction.
     627         766 : bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI,
     628             :                             SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
     629         766 :   unsigned TrueOp = 0;
     630         766 :   unsigned FalseOp = 0;
     631         766 :   bool Optimizable = false;
     632             :   SmallVector<MachineOperand, 4> Cond;
     633         766 :   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
     634             :     return false;
     635         428 :   if (!Optimizable)
     636             :     return false;
     637         428 :   if (!TII->optimizeSelect(MI, LocalMIs))
     638             :     return false;
     639         135 :   MI.eraseFromParent();
     640             :   ++NumSelects;
     641             :   return true;
     642             : }
     643             : 
     644             : /// Check if a simpler conditional branch can be generated.
     645             : bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
     646       46924 :   return TII->optimizeCondBranch(MI);
     647             : }
     648             : 
     649             : /// Try to find the next source that share the same register file
     650             : /// for the value defined by \p Reg and \p SubReg.
     651             : /// When true is returned, the \p RewriteMap can be used by the client to
     652             : /// retrieve all Def -> Use along the way up to the next source. Any found
     653             : /// Use that is not itself a key for another entry, is the next source to
     654             : /// use. During the search for the next source, multiple sources can be found
     655             : /// given multiple incoming sources of a PHI instruction. In this case, we
     656             : /// look in each PHI source for the next source; all found next sources must
     657             : /// share the same register file as \p Reg and \p SubReg. The client should
     658             : /// then be capable to rewrite all intermediate PHIs to get the next source.
     659             : /// \return False if no alternative sources are available. True otherwise.
     660      822556 : bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
     661             :                                        RewriteMapTy &RewriteMap) {
     662             :   // Do not try to find a new source for a physical register.
     663             :   // So far we do not have any motivating example for doing that.
     664             :   // Thus, instead of maintaining untested code, we will revisit that if
     665             :   // that changes at some point.
     666      822556 :   unsigned Reg = RegSubReg.Reg;
     667      822556 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     668             :     return false;
     669      822556 :   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
     670             : 
     671             :   SmallVector<RegSubRegPair, 4> SrcToLook;
     672      822556 :   RegSubRegPair CurSrcPair = RegSubReg;
     673      822556 :   SrcToLook.push_back(CurSrcPair);
     674             : 
     675             :   unsigned PHICount = 0;
     676             :   do {
     677      822707 :     CurSrcPair = SrcToLook.pop_back_val();
     678             :     // As explained above, do not handle physical registers
     679      822707 :     if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
     680      642053 :       return false;
     681             : 
     682      822707 :     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
     683             : 
     684             :     // Follow the chain of copies until we find a more suitable source, a phi
     685             :     // or have to abort.
     686             :     while (true) {
     687     1180176 :       ValueTrackerResult Res = ValTracker.getNextSource();
     688             :       // Abort at the end of a chain (without finding a suitable source).
     689     1180176 :       if (!Res.isValid())
     690             :         return false;
     691             : 
     692             :       // Insert the Def -> Use entry for the recently found source.
     693      923934 :       ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
     694      923934 :       if (CurSrcRes.isValid()) {
     695             :         assert(CurSrcRes == Res && "ValueTrackerResult found must match");
     696             :         // An existent entry with multiple sources is a PHI cycle we must avoid.
     697             :         // Otherwise it's an entry with a valid next source we already found.
     698           5 :         if (CurSrcRes.getNumSources() > 1) {
     699             :           LLVM_DEBUG(dbgs()
     700             :                      << "findNextSource: found PHI cycle, aborting...\n");
     701             :           return false;
     702             :         }
     703             :         break;
     704             :       }
     705     1847858 :       RewriteMap.insert(std::make_pair(CurSrcPair, Res));
     706             : 
     707             :       // ValueTrackerResult usually have one source unless it's the result from
     708             :       // a PHI instruction. Add the found PHI edges to be looked up further.
     709      923929 :       unsigned NumSrcs = Res.getNumSources();
     710      923929 :       if (NumSrcs > 1) {
     711         132 :         PHICount++;
     712         132 :         if (PHICount >= RewritePHILimit) {
     713             :           LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
     714             :           return false;
     715             :         }
     716             : 
     717         676 :         for (unsigned i = 0; i < NumSrcs; ++i)
     718         544 :           SrcToLook.push_back(Res.getSrc(i));
     719             :         break;
     720             :       }
     721             : 
     722      923797 :       CurSrcPair = Res.getSrc(0);
     723             :       // Do not extend the live-ranges of physical registers as they add
     724             :       // constraints to the register allocator. Moreover, if we want to extend
     725             :       // the live-range of a physical register, unlike SSA virtual register,
     726             :       // we will have to check that they aren't redefine before the related use.
     727      923797 :       if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
     728             :         return false;
     729             : 
     730             :       // Keep following the chain if the value isn't any better yet.
     731      537986 :       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
     732     1075972 :       if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
     733      537986 :                                      CurSrcPair.SubReg))
     734      357468 :         continue;
     735             : 
     736             :       // We currently cannot deal with subreg operands on PHI instructions
     737             :       // (see insertPHI()).
     738      180518 :       if (PHICount > 0 && CurSrcPair.SubReg != 0)
     739           1 :         continue;
     740             : 
     741             :       // We found a suitable source, and are done with this chain.
     742             :       break;
     743             :     }
     744      180654 :   } while (!SrcToLook.empty());
     745             : 
     746             :   // If we did not find a more suitable source, there is nothing to optimize.
     747      180503 :   return CurSrcPair.Reg != Reg;
     748             : }
     749             : 
     750             : /// Insert a PHI instruction with incoming edges \p SrcRegs that are
     751             : /// guaranteed to have the same register class. This is necessary whenever we
     752             : /// successfully traverse a PHI instruction and find suitable sources coming
     753             : /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
     754             : /// suitable to be used in a new COPY instruction.
     755             : static MachineInstr &
     756           6 : insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
     757             :           const SmallVectorImpl<RegSubRegPair> &SrcRegs,
     758             :           MachineInstr &OrigPHI) {
     759             :   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
     760             : 
     761           6 :   const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
     762             :   // NewRC is only correct if no subregisters are involved. findNextSource()
     763             :   // should have rejected those cases already.
     764             :   assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
     765           6 :   unsigned NewVR = MRI.createVirtualRegister(NewRC);
     766           6 :   MachineBasicBlock *MBB = OrigPHI.getParent();
     767             :   MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
     768          12 :                                     TII.get(TargetOpcode::PHI), NewVR);
     769             : 
     770             :   unsigned MBBOpIdx = 2;
     771          30 :   for (const RegSubRegPair &RegPair : SrcRegs) {
     772          12 :     MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
     773          24 :     MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
     774             :     // Since we're extended the lifetime of RegPair.Reg, clear the
     775             :     // kill flags to account for that and make RegPair.Reg reaches
     776             :     // the new PHI.
     777          12 :     MRI.clearKillFlags(RegPair.Reg);
     778          12 :     MBBOpIdx += 2;
     779             :   }
     780             : 
     781           6 :   return *MIB;
     782             : }
     783             : 
     784             : namespace {
     785             : 
     786             : /// Interface to query instructions amenable to copy rewriting.
     787             : class Rewriter {
     788             : protected:
     789             :   MachineInstr &CopyLike;
     790             :   unsigned CurrentSrcIdx = 0;   ///< The index of the source being rewritten.
     791             : public:
     792      722589 :   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
     793             :   virtual ~Rewriter() {}
     794             : 
     795             :   /// Get the next rewritable source (SrcReg, SrcSubReg) and
     796             :   /// the related value that it affects (DstReg, DstSubReg).
     797             :   /// A source is considered rewritable if its register class and the
     798             :   /// register class of the related DstReg may not be register
     799             :   /// coalescer friendly. In other words, given a copy-like instruction
     800             :   /// not all the arguments may be returned at rewritable source, since
     801             :   /// some arguments are none to be register coalescer friendly.
     802             :   ///
     803             :   /// Each call of this method moves the current source to the next
     804             :   /// rewritable source.
     805             :   /// For instance, let CopyLike be the instruction to rewrite.
     806             :   /// CopyLike has one definition and one source:
     807             :   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
     808             :   ///
     809             :   /// The first call will give the first rewritable source, i.e.,
     810             :   /// the only source this instruction has:
     811             :   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
     812             :   /// This source defines the whole definition, i.e.,
     813             :   /// (DstReg, DstSubReg) = (dst, dstSubIdx).
     814             :   ///
     815             :   /// The second and subsequent calls will return false, as there is only one
     816             :   /// rewritable source.
     817             :   ///
     818             :   /// \return True if a rewritable source has been found, false otherwise.
     819             :   /// The output arguments are valid if and only if true is returned.
     820             :   virtual bool getNextRewritableSource(RegSubRegPair &Src,
     821             :                                        RegSubRegPair &Dst) = 0;
     822             : 
     823             :   /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
     824             :   /// \return True if the rewriting was possible, false otherwise.
     825             :   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0;
     826             : };
     827             : 
     828             : /// Rewriter for COPY instructions.
     829             : class CopyRewriter : public Rewriter {
     830             : public:
     831      636978 :   CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
     832             :     assert(MI.isCopy() && "Expected copy instruction");
     833             :   }
     834      636978 :   virtual ~CopyRewriter() = default;
     835             : 
     836     1273956 :   bool getNextRewritableSource(RegSubRegPair &Src,
     837             :                                RegSubRegPair &Dst) override {
     838             :     // CurrentSrcIdx > 0 means this function has already been called.
     839     1273956 :     if (CurrentSrcIdx > 0)
     840             :       return false;
     841             :     // This is the first call to getNextRewritableSource.
     842             :     // Move the CurrentSrcIdx to remember that we made that call.
     843      636978 :     CurrentSrcIdx = 1;
     844             :     // The rewritable source is the argument.
     845      636978 :     const MachineOperand &MOSrc = CopyLike.getOperand(1);
     846      636978 :     Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
     847             :     // What we track are the alternative sources of the definition.
     848             :     const MachineOperand &MODef = CopyLike.getOperand(0);
     849      636978 :     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
     850      636978 :     return true;
     851             :   }
     852             : 
     853       22029 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     854       22029 :     if (CurrentSrcIdx != 1)
     855             :       return false;
     856       22029 :     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
     857       22029 :     MOSrc.setReg(NewReg);
     858             :     MOSrc.setSubReg(NewSubReg);
     859       22029 :     return true;
     860             :   }
     861             : };
     862             : 
     863             : /// Helper class to rewrite uncoalescable copy like instructions
     864             : /// into new COPY (coalescable friendly) instructions.
     865           0 : class UncoalescableRewriter : public Rewriter {
     866             :   unsigned NumDefs;  ///< Number of defs in the bitcast.
     867             : 
     868             : public:
     869       17060 :   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
     870       17060 :     NumDefs = MI.getDesc().getNumDefs();
     871             :   }
     872             : 
     873             :   /// \see See Rewriter::getNextRewritableSource()
     874             :   /// All such sources need to be considered rewritable in order to
     875             :   /// rewrite a uncoalescable copy-like instruction. This method return
     876             :   /// each definition that must be checked if rewritable.
     877        8639 :   bool getNextRewritableSource(RegSubRegPair &Src,
     878             :                                RegSubRegPair &Dst) override {
     879             :     // Find the next non-dead definition and continue from there.
     880        8639 :     if (CurrentSrcIdx == NumDefs)
     881             :       return false;
     882             : 
     883       17154 :     while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
     884           0 :       ++CurrentSrcIdx;
     885           0 :       if (CurrentSrcIdx == NumDefs)
     886             :         return false;
     887             :     }
     888             : 
     889             :     // What we track are the alternative sources of the definition.
     890        8577 :     Src = RegSubRegPair(0, 0);
     891             :     const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
     892        8577 :     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
     893             : 
     894        8577 :     CurrentSrcIdx++;
     895        8577 :     return true;
     896             :   }
     897             : 
     898           0 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     899           0 :     return false;
     900             :   }
     901             : };
     902             : 
     903             : /// Specialized rewriter for INSERT_SUBREG instruction.
     904       22945 : class InsertSubregRewriter : public Rewriter {
     905             : public:
     906       22945 :   InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
     907             :     assert(MI.isInsertSubreg() && "Invalid instruction");
     908             :   }
     909             : 
     910             :   /// \see See Rewriter::getNextRewritableSource()
     911             :   /// Here CopyLike has the following form:
     912             :   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
     913             :   /// Src1 has the same register class has dst, hence, there is
     914             :   /// nothing to rewrite.
     915             :   /// Src2.src2SubIdx, may not be register coalescer friendly.
     916             :   /// Therefore, the first call to this method returns:
     917             :   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
     918             :   /// (DstReg, DstSubReg) = (dst, subIdx).
     919             :   ///
     920             :   /// Subsequence calls will return false.
     921       45890 :   bool getNextRewritableSource(RegSubRegPair &Src,
     922             :                                RegSubRegPair &Dst) override {
     923             :     // If we already get the only source we can rewrite, return false.
     924       45890 :     if (CurrentSrcIdx == 2)
     925             :       return false;
     926             :     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
     927       22945 :     CurrentSrcIdx = 2;
     928       22945 :     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
     929       22945 :     Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
     930             :     const MachineOperand &MODef = CopyLike.getOperand(0);
     931             : 
     932             :     // We want to track something that is compatible with the
     933             :     // partial definition.
     934       22945 :     if (MODef.getSubReg())
     935             :       // Bail if we have to compose sub-register indices.
     936             :       return false;
     937       22945 :     Dst = RegSubRegPair(MODef.getReg(),
     938       22945 :                         (unsigned)CopyLike.getOperand(3).getImm());
     939       22945 :     return true;
     940             :   }
     941             : 
     942           5 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     943           5 :     if (CurrentSrcIdx != 2)
     944             :       return false;
     945             :     // We are rewriting the inserted reg.
     946           5 :     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
     947           5 :     MO.setReg(NewReg);
     948             :     MO.setSubReg(NewSubReg);
     949           5 :     return true;
     950             :   }
     951             : };
     952             : 
     953             : /// Specialized rewriter for EXTRACT_SUBREG instruction.
     954           0 : class ExtractSubregRewriter : public Rewriter {
     955             :   const TargetInstrInfo &TII;
     956             : 
     957             : public:
     958             :   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
     959           0 :       : Rewriter(MI), TII(TII) {
     960             :     assert(MI.isExtractSubreg() && "Invalid instruction");
     961             :   }
     962             : 
     963             :   /// \see Rewriter::getNextRewritableSource()
     964             :   /// Here CopyLike has the following form:
     965             :   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
     966             :   /// There is only one rewritable source: Src.subIdx,
     967             :   /// which defines dst.dstSubIdx.
     968           0 :   bool getNextRewritableSource(RegSubRegPair &Src,
     969             :                                RegSubRegPair &Dst) override {
     970             :     // If we already get the only source we can rewrite, return false.
     971           0 :     if (CurrentSrcIdx == 1)
     972             :       return false;
     973             :     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
     974           0 :     CurrentSrcIdx = 1;
     975           0 :     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
     976             :     // If we have to compose sub-register indices, bail out.
     977           0 :     if (MOExtractedReg.getSubReg())
     978             :       return false;
     979             : 
     980           0 :     Src = RegSubRegPair(MOExtractedReg.getReg(),
     981           0 :                         CopyLike.getOperand(2).getImm());
     982             : 
     983             :     // We want to track something that is compatible with the definition.
     984             :     const MachineOperand &MODef = CopyLike.getOperand(0);
     985           0 :     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
     986           0 :     return true;
     987             :   }
     988             : 
     989           0 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     990             :     // The only source we can rewrite is the input register.
     991           0 :     if (CurrentSrcIdx != 1)
     992             :       return false;
     993             : 
     994           0 :     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
     995             : 
     996             :     // If we find a source that does not require to extract something,
     997             :     // rewrite the operation with a copy.
     998           0 :     if (!NewSubReg) {
     999             :       // Move the current index to an invalid position.
    1000             :       // We do not want another call to this method to be able
    1001             :       // to do any change.
    1002           0 :       CurrentSrcIdx = -1;
    1003             :       // Rewrite the operation as a COPY.
    1004             :       // Get rid of the sub-register index.
    1005           0 :       CopyLike.RemoveOperand(2);
    1006             :       // Morph the operation into a COPY.
    1007           0 :       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
    1008           0 :       return true;
    1009             :     }
    1010           0 :     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
    1011           0 :     return true;
    1012             :   }
    1013             : };
    1014             : 
    1015             : /// Specialized rewriter for REG_SEQUENCE instruction.
    1016       54136 : class RegSequenceRewriter : public Rewriter {
    1017             : public:
    1018       54136 :   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
    1019             :     assert(MI.isRegSequence() && "Invalid instruction");
    1020             :   }
    1021             : 
    1022             :   /// \see Rewriter::getNextRewritableSource()
    1023             :   /// Here CopyLike has the following form:
    1024             :   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
    1025             :   /// Each call will return a different source, walking all the available
    1026             :   /// source.
    1027             :   ///
    1028             :   /// The first call returns:
    1029             :   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
    1030             :   /// (DstReg, DstSubReg) = (dst, subIdx1).
    1031             :   ///
    1032             :   /// The second call returns:
    1033             :   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    1034             :   /// (DstReg, DstSubReg) = (dst, subIdx2).
    1035             :   ///
    1036             :   /// And so on, until all the sources have been traversed, then
    1037             :   /// it returns false.
    1038      208192 :   bool getNextRewritableSource(RegSubRegPair &Src,
    1039             :                                RegSubRegPair &Dst) override {
    1040             :     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
    1041             : 
    1042             :     // If this is the first call, move to the first argument.
    1043      208192 :     if (CurrentSrcIdx == 0) {
    1044       54136 :       CurrentSrcIdx = 1;
    1045             :     } else {
    1046             :       // Otherwise, move to the next argument and check that it is valid.
    1047      154056 :       CurrentSrcIdx += 2;
    1048      154056 :       if (CurrentSrcIdx >= CopyLike.getNumOperands())
    1049             :         return false;
    1050             :     }
    1051      154164 :     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
    1052      154164 :     Src.Reg = MOInsertedReg.getReg();
    1053             :     // If we have to compose sub-register indices, bail out.
    1054      154164 :     if ((Src.SubReg = MOInsertedReg.getSubReg()))
    1055             :       return false;
    1056             : 
    1057             :     // We want to track something that is compatible with the related
    1058             :     // partial definition.
    1059      308112 :     Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
    1060             : 
    1061             :     const MachineOperand &MODef = CopyLike.getOperand(0);
    1062      154056 :     Dst.Reg = MODef.getReg();
    1063             :     // If we have to compose sub-registers, bail.
    1064      154056 :     return MODef.getSubReg() == 0;
    1065             :   }
    1066             : 
    1067        7805 :   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    1068             :     // We cannot rewrite out of bound operands.
    1069             :     // Moreover, rewritable sources are at odd positions.
    1070        7805 :     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
    1071             :       return false;
    1072             : 
    1073        7805 :     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    1074        7805 :     MO.setReg(NewReg);
    1075             :     MO.setSubReg(NewSubReg);
    1076        7805 :     return true;
    1077             :   }
    1078             : };
    1079             : 
    1080             : } // end anonymous namespace
    1081             : 
    1082             : /// Get the appropriated Rewriter for \p MI.
    1083             : /// \return A pointer to a dynamically allocated Rewriter or nullptr if no
    1084             : /// rewriter works for \p MI.
    1085      714059 : static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
    1086             :   // Handle uncoalescable copy-like instructions.
    1087     2856236 :   if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
    1088             :       MI.isExtractSubregLike())
    1089           0 :     return new UncoalescableRewriter(MI);
    1090             : 
    1091     1428118 :   switch (MI.getOpcode()) {
    1092             :   default:
    1093             :     return nullptr;
    1094      636978 :   case TargetOpcode::COPY:
    1095     1273956 :     return new CopyRewriter(MI);
    1096       22945 :   case TargetOpcode::INSERT_SUBREG:
    1097       45890 :     return new InsertSubregRewriter(MI);
    1098           0 :   case TargetOpcode::EXTRACT_SUBREG:
    1099           0 :     return new ExtractSubregRewriter(MI, TII);
    1100       54136 :   case TargetOpcode::REG_SEQUENCE:
    1101      108272 :     return new RegSequenceRewriter(MI);
    1102             :   }
    1103             : }
    1104             : 
    1105             : /// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
    1106             : /// the new source to use for rewrite. If \p HandleMultipleSources is true and
    1107             : /// multiple sources for a given \p Def are found along the way, we found a
    1108             : /// PHI instructions that needs to be rewritten.
    1109             : /// TODO: HandleMultipleSources should be removed once we test PHI handling
    1110             : /// with coalescable copies.
    1111             : static RegSubRegPair
    1112      180511 : getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
    1113             :              RegSubRegPair Def,
    1114             :              const PeepholeOptimizer::RewriteMapTy &RewriteMap,
    1115             :              bool HandleMultipleSources = true) {
    1116      180511 :   RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
    1117             :   while (true) {
    1118      391192 :     ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
    1119             :     // If there are no entries on the map, LookupSrc is the new source.
    1120      391192 :     if (!Res.isValid())
    1121      180505 :       return LookupSrc;
    1122             : 
    1123             :     // There's only one source for this definition, keep searching...
    1124      210687 :     unsigned NumSrcs = Res.getNumSources();
    1125      421368 :     if (NumSrcs == 1) {
    1126      210681 :       LookupSrc.Reg = Res.getSrcReg(0);
    1127      210681 :       LookupSrc.SubReg = Res.getSrcSubReg(0);
    1128             :       continue;
    1129             :     }
    1130             : 
    1131             :     // TODO: Remove once multiple srcs w/ coalescable copies are supported.
    1132           6 :     if (!HandleMultipleSources)
    1133             :       break;
    1134             : 
    1135             :     // Multiple sources, recurse into each source to find a new source
    1136             :     // for it. Then, rewrite the PHI accordingly to its new edges.
    1137             :     SmallVector<RegSubRegPair, 4> NewPHISrcs;
    1138          30 :     for (unsigned i = 0; i < NumSrcs; ++i) {
    1139          12 :       RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
    1140          12 :       NewPHISrcs.push_back(
    1141          24 :           getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
    1142             :     }
    1143             : 
    1144             :     // Build the new PHI node and return its def register as the new source.
    1145           6 :     MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
    1146           6 :     MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
    1147             :     LLVM_DEBUG(dbgs() << "-- getNewSource\n");
    1148             :     LLVM_DEBUG(dbgs() << "   Replacing: " << OrigPHI);
    1149             :     LLVM_DEBUG(dbgs() << "        With: " << NewPHI);
    1150           6 :     const MachineOperand &MODef = NewPHI.getOperand(0);
    1151           6 :     return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
    1152             :   }
    1153             : 
    1154           0 :   return RegSubRegPair(0, 0);
    1155             : }
    1156             : 
    1157             : /// Optimize generic copy instructions to avoid cross register bank copy.
    1158             : /// The optimization looks through a chain of copies and tries to find a source
    1159             : /// that has a compatible register class.
    1160             : /// Two register classes are considered to be compatible if they share the same
    1161             : /// register bank.
    1162             : /// New copies issued by this optimization are register allocator
    1163             : /// friendly. This optimization does not remove any copy as it may
    1164             : /// overconstrain the register allocator, but replaces some operands
    1165             : /// when possible.
    1166             : /// \pre isCoalescableCopy(*MI) is true.
    1167             : /// \return True, when \p MI has been rewritten. False otherwise.
    1168     1143341 : bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
    1169             :   assert(isCoalescableCopy(MI) && "Invalid argument");
    1170             :   assert(MI.getDesc().getNumDefs() == 1 &&
    1171             :          "Coalescer can understand multiple defs?!");
    1172     1143341 :   const MachineOperand &MODef = MI.getOperand(0);
    1173             :   // Do not rewrite physical definitions.
    1174     2286682 :   if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
    1175             :     return false;
    1176             : 
    1177             :   bool Changed = false;
    1178             :   // Get the right rewriter for the current copy.
    1179      714059 :   std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
    1180             :   // If none exists, bail out.
    1181      714059 :   if (!CpyRewriter)
    1182             :     return false;
    1183             :   // Rewrite each rewritable source.
    1184             :   RegSubRegPair Src;
    1185             :   RegSubRegPair TrackPair;
    1186     1528038 :   while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
    1187             :     // Keep track of PHI nodes and its incoming edges when looking for sources.
    1188       29839 :     RewriteMapTy RewriteMap;
    1189             :     // Try to find a more suitable source. If we failed to do so, or get the
    1190             :     // actual source, move to the next source.
    1191      813979 :     if (!findNextSource(TrackPair, RewriteMap))
    1192     1417725 :       continue;
    1193             : 
    1194             :     // Get the new source to rewrite. TODO: Only enable handling of multiple
    1195             :     // sources (PHIs) once we have a motivating example and testcases for it.
    1196             :     RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
    1197      180394 :                                         /*HandleMultipleSources=*/false);
    1198      180394 :     if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
    1199      150555 :       continue;
    1200             : 
    1201             :     // Rewrite source.
    1202       29839 :     if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
    1203             :       // We may have extended the live-range of NewSrc, account for that.
    1204       29839 :       MRI->clearKillFlags(NewSrc.Reg);
    1205             :       Changed = true;
    1206             :     }
    1207             :   }
    1208             :   // TODO: We could have a clean-up method to tidy the instruction.
    1209             :   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
    1210             :   // => v0 = COPY v1
    1211             :   // Currently we haven't seen motivating example for that and we
    1212             :   // want to avoid untested code.
    1213             :   NumRewrittenCopies += Changed;
    1214      714059 :   return Changed;
    1215             : }
    1216             : 
    1217             : /// Rewrite the source found through \p Def, by using the \p RewriteMap
    1218             : /// and create a new COPY instruction. More info about RewriteMap in
    1219             : /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
    1220             : /// Uncoalescable copies, since they are copy like instructions that aren't
    1221             : /// recognized by the register allocator.
    1222             : MachineInstr &
    1223         105 : PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
    1224             :                                  RegSubRegPair Def, RewriteMapTy &RewriteMap) {
    1225             :   assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
    1226             :          "We do not rewrite physical registers");
    1227             : 
    1228             :   // Find the new source to use in the COPY rewrite.
    1229         105 :   RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
    1230             : 
    1231             :   // Insert the COPY.
    1232         105 :   const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
    1233         105 :   unsigned NewVReg = MRI->createVirtualRegister(DefRC);
    1234             : 
    1235             :   MachineInstr *NewCopy =
    1236         315 :       BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
    1237         105 :               TII->get(TargetOpcode::COPY), NewVReg)
    1238         105 :           .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
    1239             : 
    1240         105 :   if (Def.SubReg) {
    1241           0 :     NewCopy->getOperand(0).setSubReg(Def.SubReg);
    1242           0 :     NewCopy->getOperand(0).setIsUndef();
    1243             :   }
    1244             : 
    1245             :   LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
    1246             :   LLVM_DEBUG(dbgs() << "   Replacing: " << CopyLike);
    1247             :   LLVM_DEBUG(dbgs() << "        With: " << *NewCopy);
    1248         105 :   MRI->replaceRegWith(Def.Reg, NewVReg);
    1249         105 :   MRI->clearKillFlags(NewVReg);
    1250             : 
    1251             :   // We extended the lifetime of NewSrc.Reg, clear the kill flags to
    1252             :   // account for that.
    1253         105 :   MRI->clearKillFlags(NewSrc.Reg);
    1254             : 
    1255         105 :   return *NewCopy;
    1256             : }
    1257             : 
    1258             : /// Optimize copy-like instructions to create
    1259             : /// register coalescer friendly instruction.
    1260             : /// The optimization tries to kill-off the \p MI by looking
    1261             : /// through a chain of copies to find a source that has a compatible
    1262             : /// register class.
    1263             : /// If such a source is found, it replace \p MI by a generic COPY
    1264             : /// operation.
    1265             : /// \pre isUncoalescableCopy(*MI) is true.
    1266             : /// \return True, when \p MI has been optimized. In that case, \p MI has
    1267             : /// been removed from its parent.
    1268             : /// All COPY instructions created, are inserted in \p LocalMIs.
    1269        8530 : bool PeepholeOptimizer::optimizeUncoalescableCopy(
    1270             :     MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    1271             :   assert(isUncoalescableCopy(MI) && "Invalid argument");
    1272             :   UncoalescableRewriter CpyRewriter(MI);
    1273             : 
    1274             :   // Rewrite each rewritable source by generating new COPYs. This works
    1275             :   // differently from optimizeCoalescableCopy since it first makes sure that all
    1276             :   // definitions can be rewritten.
    1277        8530 :   RewriteMapTy RewriteMap;
    1278             :   RegSubRegPair Src;
    1279             :   RegSubRegPair Def;
    1280             :   SmallVector<RegSubRegPair, 4> RewritePairs;
    1281        8748 :   while (CpyRewriter.getNextRewritableSource(Src, Def)) {
    1282             :     // If a physical register is here, this is probably for a good reason.
    1283             :     // Do not rewrite that.
    1284       17154 :     if (TargetRegisterInfo::isPhysicalRegister(Def.Reg))
    1285             :       return false;
    1286             : 
    1287             :     // If we do not know how to rewrite this definition, there is no point
    1288             :     // in trying to kill this instruction.
    1289        8577 :     if (!findNextSource(Def, RewriteMap))
    1290             :       return false;
    1291             : 
    1292         109 :     RewritePairs.push_back(Def);
    1293             :   }
    1294             : 
    1295             :   // The change is possible for all defs, do it.
    1296         272 :   for (const RegSubRegPair &Def : RewritePairs) {
    1297             :     // Rewrite the "copy" in a way the register coalescer understands.
    1298         105 :     MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
    1299         105 :     LocalMIs.insert(&NewCopy);
    1300             :   }
    1301             : 
    1302             :   // MI is now dead.
    1303          62 :   MI.eraseFromParent();
    1304             :   ++NumUncoalescableCopies;
    1305          62 :   return true;
    1306             : }
    1307             : 
    1308             : /// Check whether MI is a candidate for folding into a later instruction.
    1309             : /// We only fold loads to virtual registers and the virtual register defined
    1310             : /// has a single use.
    1311     3801038 : bool PeepholeOptimizer::isLoadFoldable(
    1312             :     MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
    1313     3801038 :   if (!MI.canFoldAsLoad() || !MI.mayLoad())
    1314             :     return false;
    1315      175942 :   const MCInstrDesc &MCID = MI.getDesc();
    1316      175942 :   if (MCID.getNumDefs() != 1)
    1317             :     return false;
    1318             : 
    1319      175942 :   unsigned Reg = MI.getOperand(0).getReg();
    1320             :   // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
    1321             :   // loads. It should be checked when processing uses of the load, since
    1322             :   // uses can be removed during peephole.
    1323      175942 :   if (!MI.getOperand(0).getSubReg() &&
    1324      351831 :       TargetRegisterInfo::isVirtualRegister(Reg) &&
    1325      175889 :       MRI->hasOneNonDBGUse(Reg)) {
    1326      137801 :     FoldAsLoadDefCandidates.insert(Reg);
    1327             :     return true;
    1328             :   }
    1329             :   return false;
    1330             : }
    1331             : 
    1332     3801038 : bool PeepholeOptimizer::isMoveImmediate(
    1333             :     MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
    1334             :     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
    1335     3801038 :   const MCInstrDesc &MCID = MI.getDesc();
    1336     3801038 :   if (!MI.isMoveImmediate())
    1337             :     return false;
    1338       65291 :   if (MCID.getNumDefs() != 1)
    1339             :     return false;
    1340       64533 :   unsigned Reg = MI.getOperand(0).getReg();
    1341       64533 :   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    1342      125618 :     ImmDefMIs.insert(std::make_pair(Reg, &MI));
    1343       62809 :     ImmDefRegs.insert(Reg);
    1344             :     return true;
    1345             :   }
    1346             : 
    1347             :   return false;
    1348             : }
    1349             : 
    1350             : /// Try folding register operands that are defined by move immediate
    1351             : /// instructions, i.e. a trivial constant folding optimization, if
    1352             : /// and only if the def and use are in the same BB.
    1353      341152 : bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
    1354             :     SmallSet<unsigned, 4> &ImmDefRegs,
    1355             :     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
    1356     2773226 :   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
    1357     1046739 :     MachineOperand &MO = MI.getOperand(i);
    1358     1792235 :     if (!MO.isReg() || MO.isDef())
    1359      981876 :       continue;
    1360             :     // Ignore dead implicit defs.
    1361      463759 :     if (MO.isImplicit() && MO.isDead())
    1362             :       continue;
    1363      463759 :     unsigned Reg = MO.getReg();
    1364      497682 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    1365             :       continue;
    1366      429836 :     if (ImmDefRegs.count(Reg) == 0)
    1367             :       continue;
    1368       64863 :     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
    1369             :     assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
    1370       64863 :     if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
    1371             :       ++NumImmFold;
    1372        1278 :       return true;
    1373             :     }
    1374             :   }
    1375             :   return false;
    1376             : }
    1377             : 
    1378             : // FIXME: This is very simple and misses some cases which should be handled when
    1379             : // motivating examples are found.
    1380             : //
    1381             : // The copy rewriting logic should look at uses as well as defs and be able to
    1382             : // eliminate copies across blocks.
    1383             : //
    1384             : // Later copies that are subregister extracts will also not be eliminated since
    1385             : // only the first copy is considered.
    1386             : //
    1387             : // e.g.
    1388             : // %1 = COPY %0
    1389             : // %2 = COPY %0:sub1
    1390             : //
    1391             : // Should replace %2 uses with %1:sub1
    1392     1044231 : bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
    1393             :     SmallSet<unsigned, 4> &CopySrcRegs,
    1394             :     DenseMap<unsigned, MachineInstr *> &CopyMIs) {
    1395             :   assert(MI.isCopy() && "expected a COPY machine instruction");
    1396             : 
    1397     1044231 :   unsigned SrcReg = MI.getOperand(1).getReg();
    1398     1044231 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
    1399             :     return false;
    1400             : 
    1401      668510 :   unsigned DstReg = MI.getOperand(0).getReg();
    1402      668510 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
    1403             :     return false;
    1404             : 
    1405      239236 :   if (CopySrcRegs.insert(SrcReg).second) {
    1406             :     // First copy of this reg seen.
    1407      349270 :     CopyMIs.insert(std::make_pair(SrcReg, &MI));
    1408             :     return false;
    1409             :   }
    1410             : 
    1411       64601 :   MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
    1412             : 
    1413       64601 :   unsigned SrcSubReg = MI.getOperand(1).getSubReg();
    1414       64601 :   unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
    1415             : 
    1416             :   // Can't replace different subregister extracts.
    1417       64601 :   if (SrcSubReg != PrevSrcSubReg)
    1418             :     return false;
    1419             : 
    1420       20948 :   unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
    1421             : 
    1422             :   // Only replace if the copy register class is the same.
    1423             :   //
    1424             :   // TODO: If we have multiple copies to different register classes, we may want
    1425             :   // to track multiple copies of the same source register.
    1426       41896 :   if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
    1427             :     return false;
    1428             : 
    1429       20903 :   MRI->replaceRegWith(DstReg, PrevDstReg);
    1430             : 
    1431             :   // Lifetime of the previous copy has been extended.
    1432       20903 :   MRI->clearKillFlags(PrevDstReg);
    1433             :   return true;
    1434             : }
    1435             : 
    1436             : bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
    1437     7151572 :   return TargetRegisterInfo::isPhysicalRegister(Reg) &&
    1438     2423100 :          !MRI->isAllocatable(Reg);
    1439             : }
    1440             : 
    1441     1023328 : bool PeepholeOptimizer::foldRedundantNAPhysCopy(
    1442             :     MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
    1443             :   assert(MI.isCopy() && "expected a COPY machine instruction");
    1444             : 
    1445     1023328 :   if (DisableNAPhysCopyOpt)
    1446             :     return false;
    1447             : 
    1448     1023328 :   unsigned DstReg = MI.getOperand(0).getReg();
    1449     1023328 :   unsigned SrcReg = MI.getOperand(1).getReg();
    1450       88486 :   if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
    1451             :     // %vreg = COPY %physreg
    1452             :     // Avoid using a datastructure which can track multiple live non-allocatable
    1453             :     // phys->virt copies since LLVM doesn't seem to do this.
    1454       44235 :     NAPhysToVirtMIs.insert({SrcReg, &MI});
    1455       44235 :     return false;
    1456             :   }
    1457             : 
    1458     1626700 :   if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
    1459             :     return false;
    1460             : 
    1461             :   // %physreg = COPY %vreg
    1462        1143 :   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
    1463        1143 :   if (PrevCopy == NAPhysToVirtMIs.end()) {
    1464             :     // We can't remove the copy: there was an intervening clobber of the
    1465             :     // non-allocatable physical register after the copy to virtual.
    1466             :     LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
    1467             :                       << MI);
    1468             :     return false;
    1469             :   }
    1470             : 
    1471         782 :   unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
    1472         782 :   if (PrevDstReg == SrcReg) {
    1473             :     // Remove the virt->phys copy: we saw the virtual register definition, and
    1474             :     // the non-allocatable physical register's state hasn't changed since then.
    1475             :     LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
    1476             :     ++NumNAPhysCopies;
    1477             :     return true;
    1478             :   }
    1479             : 
    1480             :   // Potential missed optimization opportunity: we saw a different virtual
    1481             :   // register get a copy of the non-allocatable physical register, and we only
    1482             :   // track one such copy. Avoid getting confused by this new non-allocatable
    1483             :   // physical register definition, and remove it from the tracked copies.
    1484             :   LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
    1485             :   NAPhysToVirtMIs.erase(PrevCopy);
    1486         322 :   return false;
    1487             : }
    1488             : 
    1489             : /// \bried Returns true if \p MO is a virtual register operand.
    1490             : static bool isVirtualRegisterOperand(MachineOperand &MO) {
    1491        3450 :   if (!MO.isReg())
    1492             :     return false;
    1493        3450 :   return TargetRegisterInfo::isVirtualRegister(MO.getReg());
    1494             : }
    1495             : 
    1496       12849 : bool PeepholeOptimizer::findTargetRecurrence(
    1497             :     unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
    1498             :     RecurrenceCycle &RC) {
    1499             :   // Recurrence found if Reg is in TargetRegs.
    1500       12849 :   if (TargetRegs.count(Reg))
    1501             :     return true;
    1502             : 
    1503             :   // TODO: Curerntly, we only allow the last instruction of the recurrence
    1504             :   // cycle (the instruction that feeds the PHI instruction) to have more than
    1505             :   // one uses to guarantee that commuting operands does not tie registers
    1506             :   // with overlapping live range. Once we have actual live range info of
    1507             :   // each register, this constraint can be relaxed.
    1508       11364 :   if (!MRI->hasOneNonDBGUse(Reg))
    1509             :     return false;
    1510             : 
    1511             :   // Give up if the reccurrence chain length is longer than the limit.
    1512        4228 :   if (RC.size() >= MaxRecurrenceChain)
    1513             :     return false;
    1514             : 
    1515        8390 :   MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
    1516        4195 :   unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
    1517             : 
    1518             :   // Only interested in recurrences whose instructions have only one def, which
    1519             :   // is a virtual register.
    1520        4195 :   if (MI.getDesc().getNumDefs() != 1)
    1521             :     return false;
    1522             : 
    1523        3450 :   MachineOperand &DefOp = MI.getOperand(0);
    1524        3450 :   if (!isVirtualRegisterOperand(DefOp))
    1525             :     return false;
    1526             : 
    1527             :   // Check if def operand of MI is tied to any use operand. We are only
    1528             :   // interested in the case that all the instructions in the recurrence chain
    1529             :   // have there def operand tied with one of the use operand.
    1530             :   unsigned TiedUseIdx;
    1531        3307 :   if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
    1532             :     return false;
    1533             : 
    1534        1856 :   if (Idx == TiedUseIdx) {
    1535        3338 :     RC.push_back(RecurrenceInstr(&MI));
    1536        1669 :     return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
    1537             :   } else {
    1538             :     // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
    1539         187 :     unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
    1540         187 :     if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
    1541         284 :       RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
    1542         142 :       return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
    1543             :     }
    1544             :   }
    1545             : 
    1546          45 :   return false;
    1547             : }
    1548             : 
    1549             : /// Phi instructions will eventually be lowered to copy instructions.
    1550             : /// If phi is in a loop header, a recurrence may formulated around the source
    1551             : /// and destination of the phi. For such case commuting operands of the
    1552             : /// instructions in the recurrence may enable coalescing of the copy instruction
    1553             : /// generated from the phi. For example, if there is a recurrence of
    1554             : ///
    1555             : /// LoopHeader:
    1556             : ///   %1 = phi(%0, %100)
    1557             : /// LoopLatch:
    1558             : ///   %0<def, tied1> = ADD %2<def, tied0>, %1
    1559             : ///
    1560             : /// , the fact that %0 and %2 are in the same tied operands set makes
    1561             : /// the coalescing of copy instruction generated from the phi in
    1562             : /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
    1563             : /// %2 have overlapping live range. This introduces additional move
    1564             : /// instruction to the final assembly. However, if we commute %2 and
    1565             : /// %1 of ADD instruction, the redundant move instruction can be
    1566             : /// avoided.
    1567       11038 : bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
    1568       11038 :   SmallSet<unsigned, 2> TargetRegs;
    1569       56080 :   for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
    1570       22521 :     MachineOperand &MO = PHI.getOperand(Idx);
    1571             :     assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
    1572       22521 :     TargetRegs.insert(MO.getReg());
    1573             :   }
    1574             : 
    1575             :   bool Changed = false;
    1576             :   RecurrenceCycle RC;
    1577       11038 :   if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
    1578             :     // Commutes operands of instructions in RC if necessary so that the copy to
    1579             :     // be generated from PHI can be coalesced.
    1580             :     LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
    1581        4423 :     for (auto &RI : RC) {
    1582             :       LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
    1583             :       auto CP = RI.getCommutePair();
    1584        1469 :       if (CP) {
    1585             :         Changed = true;
    1586         106 :         TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
    1587             :                                 (*CP).second);
    1588             :         LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
    1589             :       }
    1590             :     }
    1591             :   }
    1592             : 
    1593       11038 :   return Changed;
    1594             : }
    1595             : 
    1596      181413 : bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    1597      181413 :   if (skipFunction(MF.getFunction()))
    1598             :     return false;
    1599             : 
    1600             :   LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
    1601             :   LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
    1602             : 
    1603      181257 :   if (DisablePeephole)
    1604             :     return false;
    1605             : 
    1606      176024 :   TII = MF.getSubtarget().getInstrInfo();
    1607      176024 :   TRI = MF.getSubtarget().getRegisterInfo();
    1608      176024 :   MRI = &MF.getRegInfo();
    1609      176024 :   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
    1610      176024 :   MLI = &getAnalysis<MachineLoopInfo>();
    1611             : 
    1612             :   bool Changed = false;
    1613             : 
    1614      510779 :   for (MachineBasicBlock &MBB : MF) {
    1615             :     bool SeenMoveImm = false;
    1616             : 
    1617             :     // During this forward scan, at some point it needs to answer the question
    1618             :     // "given a pointer to an MI in the current BB, is it located before or
    1619             :     // after the current instruction".
    1620             :     // To perform this, the following set keeps track of the MIs already seen
    1621             :     // during the scan, if a MI is not in the set, it is assumed to be located
    1622             :     // after. Newly created MIs have to be inserted in the set as well.
    1623             :     SmallPtrSet<MachineInstr*, 16> LocalMIs;
    1624      334755 :     SmallSet<unsigned, 4> ImmDefRegs;
    1625             :     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    1626      334755 :     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
    1627             : 
    1628             :     // Track when a non-allocatable physical register is copied to a virtual
    1629             :     // register so that useless moves can be removed.
    1630             :     //
    1631             :     // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
    1632             :     // without any intervening re-definition of %physreg.
    1633             :     DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
    1634             : 
    1635             :     // Set of virtual registers that are copied from.
    1636      334755 :     SmallSet<unsigned, 4> CopySrcRegs;
    1637             :     DenseMap<unsigned, MachineInstr *> CopySrcMIs;
    1638             : 
    1639      334755 :     bool IsLoopHeader = MLI->isLoopHeader(&MBB);
    1640             : 
    1641      334755 :     for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
    1642     4333050 :          MII != MIE; ) {
    1643             :       MachineInstr *MI = &*MII;
    1644             :       // We may be erasing MI below, increment MII now.
    1645             :       ++MII;
    1646     3998295 :       LocalMIs.insert(MI);
    1647             : 
    1648             :       // Skip debug instructions. They should not affect this peephole optimization.
    1649       33775 :       if (MI->isDebugInstr())
    1650       33775 :           continue;
    1651             : 
    1652       79100 :       if (MI->isPosition())
    1653       79100 :         continue;
    1654             : 
    1655     3885420 :       if (IsLoopHeader && MI->isPHI()) {
    1656       11038 :         if (optimizeRecurrence(*MI)) {
    1657             :           Changed = true;
    1658         105 :           continue;
    1659             :         }
    1660             :       }
    1661             : 
    1662     3885315 :       if (!MI->isCopy()) {
    1663    31515725 :         for (const MachineOperand &MO : MI->operands()) {
    1664             :           // Visit all operands: definitions can be implicit or explicit.
    1665    14348335 :           if (MO.isReg()) {
    1666     9165840 :             unsigned Reg = MO.getReg();
    1667     9165840 :             if (MO.isDef() && isNAPhysCopy(Reg)) {
    1668     1545260 :               const auto &Def = NAPhysToVirtMIs.find(Reg);
    1669     1545260 :               if (Def != NAPhysToVirtMIs.end()) {
    1670             :                 // A new definition of the non-allocatable physical register
    1671             :                 // invalidates previous copies.
    1672             :                 LLVM_DEBUG(dbgs()
    1673             :                            << "NAPhysCopy: invalidating because of " << *MI);
    1674             :                 NAPhysToVirtMIs.erase(Def);
    1675             :               }
    1676             :             }
    1677     5182495 :           } else if (MO.isRegMask()) {
    1678      155250 :             const uint32_t *RegMask = MO.getRegMask();
    1679      352603 :             for (auto &RegMI : NAPhysToVirtMIs) {
    1680       42103 :               unsigned Def = RegMI.first;
    1681       42103 :               if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
    1682             :                 LLVM_DEBUG(dbgs()
    1683             :                            << "NAPhysCopy: invalidating because of " << *MI);
    1684       41589 :                 NAPhysToVirtMIs.erase(Def);
    1685             :               }
    1686             :             }
    1687             :           }
    1688             :         }
    1689             :       }
    1690             : 
    1691     3921524 :       if (MI->isImplicitDef() || MI->isKill())
    1692       36209 :         continue;
    1693             : 
    1694     7686603 :       if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
    1695             :         // Blow away all non-allocatable physical registers knowledge since we
    1696             :         // don't know what's correct anymore.
    1697             :         //
    1698             :         // FIXME: handle explicit asm clobbers.
    1699             :         LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
    1700             :                           << *MI);
    1701       65381 :         NAPhysToVirtMIs.clear();
    1702             :       }
    1703             : 
    1704     3858996 :       if ((isUncoalescableCopy(*MI) &&
    1705     3857574 :            optimizeUncoalescableCopy(*MI, LocalMIs)) ||
    1706     7770554 :           (MI->isCompare() && optimizeCmpInstr(*MI)) ||
    1707         766 :           (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
    1708             :         // MI is deleted.
    1709             :         LocalMIs.erase(MI);
    1710             :         Changed = true;
    1711        1360 :         continue;
    1712             :       }
    1713             : 
    1714     3894676 :       if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
    1715             :         Changed = true;
    1716           6 :         continue;
    1717             :       }
    1718             : 
    1719     1168680 :       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
    1720             :         // MI is just rewritten.
    1721             :         Changed = true;
    1722       25339 :         continue;
    1723             :       }
    1724             : 
    1725     4887995 :       if (MI->isCopy() &&
    1726     2067559 :           (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) ||
    1727     1023328 :            foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
    1728             :         LocalMIs.erase(MI);
    1729       21363 :         MI->eraseFromParent();
    1730             :         Changed = true;
    1731       21363 :         continue;
    1732             :       }
    1733             : 
    1734     3801038 :       if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
    1735             :         SeenMoveImm = true;
    1736             :       } else {
    1737     3738229 :         Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
    1738             :         // optimizeExtInstr might have created new instructions after MI
    1739             :         // and before the already incremented MII. Adjust MII so that the
    1740             :         // next iteration sees the new instructions.
    1741     3738229 :         MII = MI;
    1742             :         ++MII;
    1743     3738229 :         if (SeenMoveImm)
    1744      341152 :           Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
    1745             :       }
    1746             : 
    1747             :       // Check whether MI is a load candidate for folding into a later
    1748             :       // instruction. If MI is not a candidate, check whether we can fold an
    1749             :       // earlier load into MI.
    1750     3801038 :       if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
    1751             :           !FoldAsLoadDefCandidates.empty()) {
    1752             : 
    1753             :         // We visit each operand even after successfully folding a previous
    1754             :         // one.  This allows us to fold multiple loads into a single
    1755             :         // instruction.  We do assume that optimizeLoadInstr doesn't insert
    1756             :         // foldable uses earlier in the argument list.  Since we don't restart
    1757             :         // iteration, we'd miss such cases.
    1758      300682 :         const MCInstrDesc &MIDesc = MI->getDesc();
    1759     2673004 :         for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
    1760             :              ++i) {
    1761     1035820 :           const MachineOperand &MOp = MI->getOperand(i);
    1762     1035820 :           if (!MOp.isReg())
    1763      296950 :             continue;
    1764      738870 :           unsigned FoldAsLoadDefReg = MOp.getReg();
    1765      738870 :           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
    1766             :             // We need to fold load after optimizeCmpInstr, since
    1767             :             // optimizeCmpInstr can enable folding by converting SUB to CMP.
    1768             :             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
    1769             :             // we need it for markUsesInDebugValueAsUndef().
    1770      115443 :             unsigned FoldedReg = FoldAsLoadDefReg;
    1771      115443 :             MachineInstr *DefMI = nullptr;
    1772      115443 :             if (MachineInstr *FoldMI =
    1773      115443 :                     TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
    1774             :               // Update LocalMIs since we replaced MI with FoldMI and deleted
    1775             :               // DefMI.
    1776             :               LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
    1777             :               LLVM_DEBUG(dbgs() << "     With: " << *FoldMI);
    1778             :               LocalMIs.erase(MI);
    1779        2069 :               LocalMIs.erase(DefMI);
    1780        2069 :               LocalMIs.insert(FoldMI);
    1781        2069 :               MI->eraseFromParent();
    1782        2069 :               DefMI->eraseFromParent();
    1783        2069 :               MRI->markUsesInDebugValueAsUndef(FoldedReg);
    1784        2069 :               FoldAsLoadDefCandidates.erase(FoldedReg);
    1785             :               ++NumLoadFold;
    1786             : 
    1787             :               // MI is replaced with FoldMI so we can continue trying to fold
    1788             :               Changed = true;
    1789             :               MI = FoldMI;
    1790             :             }
    1791             :           }
    1792             :         }
    1793             :       }
    1794             : 
    1795             :       // If we run into an instruction we can't fold across, discard
    1796             :       // the load candidates.  Note: We might be able to fold *into* this
    1797             :       // instruction, so this needs to be after the folding logic.
    1798     3801038 :       if (MI->isLoadFoldBarrier()) {
    1799             :         LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
    1800             :         FoldAsLoadDefCandidates.clear();
    1801             :       }
    1802             :     }
    1803             :   }
    1804             : 
    1805             :   return Changed;
    1806             : }
    1807             : 
    1808      725274 : ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
    1809             :   assert(Def->isCopy() && "Invalid definition");
    1810             :   // Copy instruction are supposed to be: Def = Src.
    1811             :   // If someone breaks this assumption, bad things will happen everywhere.
    1812             :   assert(Def->getNumOperands() == 2 && "Invalid number of operands");
    1813             : 
    1814     1450548 :   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
    1815             :     // If we look for a different subreg, it means we want a subreg of src.
    1816             :     // Bails as we do not support composing subregs yet.
    1817        7828 :     return ValueTrackerResult();
    1818             :   // Otherwise, we want the whole source.
    1819             :   const MachineOperand &Src = Def->getOperand(1);
    1820      717446 :   if (Src.isUndef())
    1821           0 :     return ValueTrackerResult();
    1822      717446 :   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
    1823             : }
    1824             : 
    1825        4376 : ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
    1826             :   assert(Def->isBitcast() && "Invalid definition");
    1827             : 
    1828             :   // Bail if there are effects that a plain copy will not expose.
    1829        4376 :   if (Def->hasUnmodeledSideEffects())
    1830           0 :     return ValueTrackerResult();
    1831             : 
    1832             :   // Bitcasts with more than one def are not supported.
    1833        4376 :   if (Def->getDesc().getNumDefs() != 1)
    1834           0 :     return ValueTrackerResult();
    1835        8752 :   const MachineOperand DefOp = Def->getOperand(DefIdx);
    1836        4376 :   if (DefOp.getSubReg() != DefSubReg)
    1837             :     // If we look for a different subreg, it means we want a subreg of the src.
    1838             :     // Bails as we do not support composing subregs yet.
    1839           0 :     return ValueTrackerResult();
    1840             : 
    1841        4376 :   unsigned SrcIdx = Def->getNumOperands();
    1842       12266 :   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
    1843             :        ++OpIdx) {
    1844             :     const MachineOperand &MO = Def->getOperand(OpIdx);
    1845        7890 :     if (!MO.isReg() || !MO.getReg())
    1846        3506 :       continue;
    1847             :     // Ignore dead implicit defs.
    1848        4400 :     if (MO.isImplicit() && MO.isDead())
    1849           8 :       continue;
    1850             :     assert(!MO.isDef() && "We should have skipped all the definitions by now");
    1851        4376 :     if (SrcIdx != EndOpIdx)
    1852             :       // Multiple sources?
    1853           0 :       return ValueTrackerResult();
    1854             :     SrcIdx = OpIdx;
    1855             :   }
    1856             : 
    1857             :   // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
    1858             :   // will break the assumed guarantees for the upper bits.
    1859       22118 :   for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
    1860        4498 :     if (UseMI.isSubregToReg())
    1861           6 :       return ValueTrackerResult();
    1862             :   }
    1863             : 
    1864        4373 :   const MachineOperand &Src = Def->getOperand(SrcIdx);
    1865        4373 :   if (Src.isUndef())
    1866           3 :     return ValueTrackerResult();
    1867        4370 :   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
    1868             : }
    1869             : 
    1870      183216 : ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
    1871             :   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
    1872             :          "Invalid definition");
    1873             : 
    1874      366432 :   if (Def->getOperand(DefIdx).getSubReg())
    1875             :     // If we are composing subregs, bail out.
    1876             :     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
    1877             :     // This should almost never happen as the SSA property is tracked at
    1878             :     // the register level (as opposed to the subreg level).
    1879             :     // I.e.,
    1880             :     // Def.sub0 =
    1881             :     // Def.sub1 =
    1882             :     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
    1883             :     // Def. Thus, it must not be generated.
    1884             :     // However, some code could theoretically generates a single
    1885             :     // Def.sub0 (i.e, not defining the other subregs) and we would
    1886             :     // have this case.
    1887             :     // If we can ascertain (or force) that this never happens, we could
    1888             :     // turn that into an assertion.
    1889           0 :     return ValueTrackerResult();
    1890             : 
    1891      183216 :   if (!TII)
    1892             :     // We could handle the REG_SEQUENCE here, but we do not want to
    1893             :     // duplicate the code from the generic TII.
    1894           0 :     return ValueTrackerResult();
    1895             : 
    1896             :   SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
    1897      183216 :   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
    1898           0 :     return ValueTrackerResult();
    1899             : 
    1900             :   // We are looking at:
    1901             :   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
    1902             :   // Check if one of the operand defines the subreg we are interested in.
    1903      592040 :   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
    1904      381324 :     if (RegSeqInput.SubIdx == DefSubReg) {
    1905      176912 :       if (RegSeqInput.SubReg)
    1906             :         // Bail if we have to compose sub registers.
    1907         889 :         return ValueTrackerResult();
    1908             : 
    1909      176023 :       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
    1910             :     }
    1911             :   }
    1912             : 
    1913             :   // If the subreg we are tracking is super-defined by another subreg,
    1914             :   // we could follow this value. However, this would require to compose
    1915             :   // the subreg and we do not do that for now.
    1916        6304 :   return ValueTrackerResult();
    1917             : }
    1918             : 
    1919       27873 : ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
    1920             :   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
    1921             :          "Invalid definition");
    1922             : 
    1923       55746 :   if (Def->getOperand(DefIdx).getSubReg())
    1924             :     // If we are composing subreg, bail out.
    1925             :     // Same remark as getNextSourceFromRegSequence.
    1926             :     // I.e., this may be turned into an assert.
    1927           0 :     return ValueTrackerResult();
    1928             : 
    1929       27873 :   if (!TII)
    1930             :     // We could handle the REG_SEQUENCE here, but we do not want to
    1931             :     // duplicate the code from the generic TII.
    1932           0 :     return ValueTrackerResult();
    1933             : 
    1934             :   RegSubRegPair BaseReg;
    1935             :   RegSubRegPairAndIdx InsertedReg;
    1936       27873 :   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
    1937           0 :     return ValueTrackerResult();
    1938             : 
    1939             :   // We are looking at:
    1940             :   // Def = INSERT_SUBREG v0, v1, sub1
    1941             :   // There are two cases:
    1942             :   // 1. DefSubReg == sub1, get v1.
    1943             :   // 2. DefSubReg != sub1, the value may be available through v0.
    1944             : 
    1945             :   // #1 Check if the inserted register matches the required sub index.
    1946       27873 :   if (InsertedReg.SubIdx == DefSubReg) {
    1947       23041 :     return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
    1948             :   }
    1949             :   // #2 Otherwise, if the sub register we are looking for is not partial
    1950             :   // defined by the inserted element, we can look through the main
    1951             :   // register (v0).
    1952        4832 :   const MachineOperand &MODef = Def->getOperand(DefIdx);
    1953             :   // If the result register (Def) and the base register (v0) do not
    1954             :   // have the same register class or if we have to compose
    1955             :   // subregisters, bail out.
    1956       18673 :   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
    1957        4177 :       BaseReg.SubReg)
    1958         655 :     return ValueTrackerResult();
    1959             : 
    1960             :   // Get the TRI and check if the inserted sub-register overlaps with the
    1961             :   // sub-register we are tracking.
    1962        4177 :   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
    1963        8354 :   if (!TRI ||
    1964        4177 :       !(TRI->getSubRegIndexLaneMask(DefSubReg) &
    1965        4177 :         TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
    1966        4170 :     return ValueTrackerResult();
    1967             :   // At this point, the value is available in v0 via the same subreg
    1968             :   // we used for Def.
    1969           7 :   return ValueTrackerResult(BaseReg.Reg, DefSubReg);
    1970             : }
    1971             : 
    1972        2915 : ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
    1973             :   assert((Def->isExtractSubreg() ||
    1974             :           Def->isExtractSubregLike()) && "Invalid definition");
    1975             :   // We are looking at:
    1976             :   // Def = EXTRACT_SUBREG v0, sub0
    1977             : 
    1978             :   // Bail if we have to compose sub registers.
    1979             :   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
    1980        2915 :   if (DefSubReg)
    1981           0 :     return ValueTrackerResult();
    1982             : 
    1983        2915 :   if (!TII)
    1984             :     // We could handle the EXTRACT_SUBREG here, but we do not want to
    1985             :     // duplicate the code from the generic TII.
    1986           0 :     return ValueTrackerResult();
    1987             : 
    1988             :   RegSubRegPairAndIdx ExtractSubregInputReg;
    1989        2915 :   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
    1990           0 :     return ValueTrackerResult();
    1991             : 
    1992             :   // Bail if we have to compose sub registers.
    1993             :   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
    1994        2915 :   if (ExtractSubregInputReg.SubReg)
    1995           0 :     return ValueTrackerResult();
    1996             :   // Otherwise, the value is available in the v0.sub0.
    1997             :   return ValueTrackerResult(ExtractSubregInputReg.Reg,
    1998        2915 :                             ExtractSubregInputReg.SubIdx);
    1999             : }
    2000             : 
    2001          29 : ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
    2002             :   assert(Def->isSubregToReg() && "Invalid definition");
    2003             :   // We are looking at:
    2004             :   // Def = SUBREG_TO_REG Imm, v0, sub0
    2005             : 
    2006             :   // Bail if we have to compose sub registers.
    2007             :   // If DefSubReg != sub0, we would have to check that all the bits
    2008             :   // we track are included in sub0 and if yes, we would have to
    2009             :   // determine the right subreg in v0.
    2010          29 :   if (DefSubReg != Def->getOperand(3).getImm())
    2011          29 :     return ValueTrackerResult();
    2012             :   // Bail if we have to compose sub registers.
    2013             :   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
    2014           0 :   if (Def->getOperand(2).getSubReg())
    2015           0 :     return ValueTrackerResult();
    2016             : 
    2017             :   return ValueTrackerResult(Def->getOperand(2).getReg(),
    2018           0 :                             Def->getOperand(3).getImm());
    2019             : }
    2020             : 
    2021             : /// Explore each PHI incoming operand and return its sources.
    2022         273 : ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
    2023             :   assert(Def->isPHI() && "Invalid definition");
    2024             :   ValueTrackerResult Res;
    2025             : 
    2026             :   // If we look for a different subreg, bail as we do not support composing
    2027             :   // subregs yet.
    2028         546 :   if (Def->getOperand(0).getSubReg() != DefSubReg)
    2029         141 :     return ValueTrackerResult();
    2030             : 
    2031             :   // Return all register sources for PHI instructions.
    2032         404 :   for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
    2033         272 :     const MachineOperand &MO = Def->getOperand(i);
    2034             :     assert(MO.isReg() && "Invalid PHI instruction");
    2035             :     // We have no code to deal with undef operands. They shouldn't happen in
    2036             :     // normal programs anyway.
    2037         272 :     if (MO.isUndef())
    2038           0 :       return ValueTrackerResult();
    2039         272 :     Res.addSource(MO.getReg(), MO.getSubReg());
    2040             :   }
    2041             : 
    2042             :   return Res;
    2043             : }
    2044             : 
    2045     1180176 : ValueTrackerResult ValueTracker::getNextSourceImpl() {
    2046             :   assert(Def && "This method needs a valid definition");
    2047             : 
    2048             :   assert(((Def->getOperand(DefIdx).isDef() &&
    2049             :            (DefIdx < Def->getDesc().getNumDefs() ||
    2050             :             Def->getDesc().isVariadic())) ||
    2051             :           Def->getOperand(DefIdx).isImplicit()) &&
    2052             :          "Invalid DefIdx");
    2053     2360352 :   if (Def->isCopy())
    2054      725274 :     return getNextSourceFromCopy();
    2055      454902 :   if (Def->isBitcast())
    2056        4376 :     return getNextSourceFromBitcast();
    2057             :   // All the remaining cases involve "complex" instructions.
    2058             :   // Bail if we did not ask for the advanced tracking.
    2059      450526 :   if (DisableAdvCopyOpt)
    2060          38 :     return ValueTrackerResult();
    2061     1169574 :   if (Def->isRegSequence() || Def->isRegSequenceLike())
    2062      183216 :     return getNextSourceFromRegSequence();
    2063      774136 :   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
    2064       27873 :     return getNextSourceFromInsertSubreg();
    2065      718197 :   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
    2066        2915 :     return getNextSourceFromExtractSubreg();
    2067      472968 :   if (Def->isSubregToReg())
    2068          29 :     return getNextSourceFromSubregToReg();
    2069             :   if (Def->isPHI())
    2070         273 :     return getNextSourceFromPHI();
    2071      236182 :   return ValueTrackerResult();
    2072             : }
    2073             : 
    2074     1180176 : ValueTrackerResult ValueTracker::getNextSource() {
    2075             :   // If we reach a point where we cannot move up in the use-def chain,
    2076             :   // there is nothing we can get.
    2077     1180176 :   if (!Def)
    2078           0 :     return ValueTrackerResult();
    2079             : 
    2080     1180176 :   ValueTrackerResult Res = getNextSourceImpl();
    2081     1180176 :   if (Res.isValid()) {
    2082             :     // Update definition, definition index, and subregister for the
    2083             :     // next call of getNextSource.
    2084             :     // Update the current register.
    2085             :     bool OneRegSrc = Res.getNumSources() == 1;
    2086      923934 :     if (OneRegSrc)
    2087      923802 :       Reg = Res.getSrcReg(0);
    2088             :     // Update the result before moving up in the use-def chain
    2089             :     // with the instruction containing the last found sources.
    2090      923934 :     Res.setInst(Def);
    2091             : 
    2092             :     // If we can still move up in the use-def chain, move to the next
    2093             :     // definition.
    2094     1847868 :     if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
    2095      537991 :       MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
    2096      537991 :       if (DI != MRI.def_end()) {
    2097      537991 :         Def = DI->getParent();
    2098      537991 :         DefIdx = DI.getOperandNo();
    2099      537991 :         DefSubReg = Res.getSrcSubReg(0);
    2100             :       } else {
    2101           0 :         Def = nullptr;
    2102             :       }
    2103             :       return Res;
    2104             :     }
    2105             :   }
    2106             :   // If we end up here, this means we will not be able to find another source
    2107             :   // for the next iteration. Make sure any new call to getNextSource bails out
    2108             :   // early by cutting the use-def chain.
    2109      642185 :   Def = nullptr;
    2110             :   return Res;
    2111      303507 : }

Generated by: LCOV version 1.13