LCOV - code coverage report
Current view: top level - lib/CodeGen - RegisterCoalescer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 965 1139 84.7 %
Date: 2018-09-23 13:06:45 Functions: 49 59 83.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the generic RegisterCoalescer interface which
      11             : // is used as the common interface used by all clients and
      12             : // implementations of register coalescing.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "RegisterCoalescer.h"
      17             : #include "llvm/ADT/ArrayRef.h"
      18             : #include "llvm/ADT/BitVector.h"
      19             : #include "llvm/ADT/DenseSet.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallPtrSet.h"
      22             : #include "llvm/ADT/SmallVector.h"
      23             : #include "llvm/ADT/Statistic.h"
      24             : #include "llvm/Analysis/AliasAnalysis.h"
      25             : #include "llvm/CodeGen/LiveInterval.h"
      26             : #include "llvm/CodeGen/LiveIntervals.h"
      27             : #include "llvm/CodeGen/LiveRangeEdit.h"
      28             : #include "llvm/CodeGen/MachineBasicBlock.h"
      29             : #include "llvm/CodeGen/MachineFunction.h"
      30             : #include "llvm/CodeGen/MachineFunctionPass.h"
      31             : #include "llvm/CodeGen/MachineInstr.h"
      32             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      33             : #include "llvm/CodeGen/MachineLoopInfo.h"
      34             : #include "llvm/CodeGen/MachineOperand.h"
      35             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      36             : #include "llvm/CodeGen/Passes.h"
      37             : #include "llvm/CodeGen/RegisterClassInfo.h"
      38             : #include "llvm/CodeGen/SlotIndexes.h"
      39             : #include "llvm/CodeGen/TargetInstrInfo.h"
      40             : #include "llvm/CodeGen/TargetOpcodes.h"
      41             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      42             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      43             : #include "llvm/IR/DebugLoc.h"
      44             : #include "llvm/MC/LaneBitmask.h"
      45             : #include "llvm/MC/MCInstrDesc.h"
      46             : #include "llvm/MC/MCRegisterInfo.h"
      47             : #include "llvm/Pass.h"
      48             : #include "llvm/Support/CommandLine.h"
      49             : #include "llvm/Support/Compiler.h"
      50             : #include "llvm/Support/Debug.h"
      51             : #include "llvm/Support/ErrorHandling.h"
      52             : #include "llvm/Support/raw_ostream.h"
      53             : #include <algorithm>
      54             : #include <cassert>
      55             : #include <iterator>
      56             : #include <limits>
      57             : #include <tuple>
      58             : #include <utility>
      59             : #include <vector>
      60             : 
      61             : using namespace llvm;
      62             : 
      63             : #define DEBUG_TYPE "regalloc"
      64             : 
      65             : STATISTIC(numJoins    , "Number of interval joins performed");
      66             : STATISTIC(numCrossRCs , "Number of cross class joins performed");
      67             : STATISTIC(numCommutes , "Number of instruction commuting performed");
      68             : STATISTIC(numExtends  , "Number of copies extended");
      69             : STATISTIC(NumReMats   , "Number of instructions re-materialized");
      70             : STATISTIC(NumInflated , "Number of register classes inflated");
      71             : STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
      72             : STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
      73             : STATISTIC(NumShrinkToUses,  "Number of shrinkToUses called");
      74             : 
      75             : static cl::opt<bool> EnableJoining("join-liveintervals",
      76             :                                    cl::desc("Coalesce copies (default=true)"),
      77             :                                    cl::init(true), cl::Hidden);
      78             : 
      79             : static cl::opt<bool> UseTerminalRule("terminal-rule",
      80             :                                      cl::desc("Apply the terminal rule"),
      81             :                                      cl::init(false), cl::Hidden);
      82             : 
      83             : /// Temporary flag to test critical edge unsplitting.
      84             : static cl::opt<bool>
      85             : EnableJoinSplits("join-splitedges",
      86             :   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
      87             : 
      88             : /// Temporary flag to test global copy optimization.
      89             : static cl::opt<cl::boolOrDefault>
      90             : EnableGlobalCopies("join-globalcopies",
      91             :   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
      92             :   cl::init(cl::BOU_UNSET), cl::Hidden);
      93             : 
      94             : static cl::opt<bool>
      95             : VerifyCoalescing("verify-coalescing",
      96             :          cl::desc("Verify machine instrs before and after register coalescing"),
      97             :          cl::Hidden);
      98             : 
      99             : static cl::opt<unsigned> LateRematUpdateThreshold(
     100             :     "late-remat-update-threshold", cl::Hidden,
     101             :     cl::desc("During rematerialization for a copy, if the def instruction has "
     102             :              "many other copy uses to be rematerialized, delay the multiple "
     103             :              "separate live interval update work and do them all at once after "
     104             :              "all those rematerialization are done. It will save a lot of "
     105             :              "repeated work. "),
     106             :     cl::init(100));
     107             : 
     108             : namespace {
     109             : 
     110             :   class RegisterCoalescer : public MachineFunctionPass,
     111             :                             private LiveRangeEdit::Delegate {
     112             :     MachineFunction* MF;
     113             :     MachineRegisterInfo* MRI;
     114             :     const TargetRegisterInfo* TRI;
     115             :     const TargetInstrInfo* TII;
     116             :     LiveIntervals *LIS;
     117             :     const MachineLoopInfo* Loops;
     118             :     AliasAnalysis *AA;
     119             :     RegisterClassInfo RegClassInfo;
     120             : 
     121             :     /// A LaneMask to remember on which subregister live ranges we need to call
     122             :     /// shrinkToUses() later.
     123             :     LaneBitmask ShrinkMask;
     124             : 
     125             :     /// True if the main range of the currently coalesced intervals should be
     126             :     /// checked for smaller live intervals.
     127             :     bool ShrinkMainRange;
     128             : 
     129             :     /// True if the coalescer should aggressively coalesce global copies
     130             :     /// in favor of keeping local copies.
     131             :     bool JoinGlobalCopies;
     132             : 
     133             :     /// True if the coalescer should aggressively coalesce fall-thru
     134             :     /// blocks exclusively containing copies.
     135             :     bool JoinSplitEdges;
     136             : 
     137             :     /// Copy instructions yet to be coalesced.
     138             :     SmallVector<MachineInstr*, 8> WorkList;
     139             :     SmallVector<MachineInstr*, 8> LocalWorkList;
     140             : 
     141             :     /// Set of instruction pointers that have been erased, and
     142             :     /// that may be present in WorkList.
     143             :     SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
     144             : 
     145             :     /// Dead instructions that are about to be deleted.
     146             :     SmallVector<MachineInstr*, 8> DeadDefs;
     147             : 
     148             :     /// Virtual registers to be considered for register class inflation.
     149             :     SmallVector<unsigned, 8> InflateRegs;
     150             : 
     151             :     /// The collection of live intervals which should have been updated
     152             :     /// immediately after rematerialiation but delayed until
     153             :     /// lateLiveIntervalUpdate is called.
     154             :     DenseSet<unsigned> ToBeUpdated;
     155             : 
     156             :     /// Recursively eliminate dead defs in DeadDefs.
     157             :     void eliminateDeadDefs();
     158             : 
     159             :     /// LiveRangeEdit callback for eliminateDeadDefs().
     160             :     void LRE_WillEraseInstruction(MachineInstr *MI) override;
     161             : 
     162             :     /// Coalesce the LocalWorkList.
     163             :     void coalesceLocals();
     164             : 
     165             :     /// Join compatible live intervals
     166             :     void joinAllIntervals();
     167             : 
     168             :     /// Coalesce copies in the specified MBB, putting
     169             :     /// copies that cannot yet be coalesced into WorkList.
     170             :     void copyCoalesceInMBB(MachineBasicBlock *MBB);
     171             : 
     172             :     /// Tries to coalesce all copies in CurrList. Returns true if any progress
     173             :     /// was made.
     174             :     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
     175             : 
     176             :     /// If one def has many copy like uses, and those copy uses are all
     177             :     /// rematerialized, the live interval update needed for those
     178             :     /// rematerializations will be delayed and done all at once instead
     179             :     /// of being done multiple times. This is to save compile cost becuase
     180             :     /// live interval update is costly.
     181             :     void lateLiveIntervalUpdate();
     182             : 
     183             :     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
     184             :     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
     185             :     /// was successfully coalesced away. If it is not currently possible to
     186             :     /// coalesce this interval, but it may be possible if other things get
     187             :     /// coalesced, then it returns true by reference in 'Again'.
     188             :     bool joinCopy(MachineInstr *CopyMI, bool &Again);
     189             : 
     190             :     /// Attempt to join these two intervals.  On failure, this
     191             :     /// returns false.  The output "SrcInt" will not have been modified, so we
     192             :     /// can use this information below to update aliases.
     193             :     bool joinIntervals(CoalescerPair &CP);
     194             : 
     195             :     /// Attempt joining two virtual registers. Return true on success.
     196             :     bool joinVirtRegs(CoalescerPair &CP);
     197             : 
     198             :     /// Attempt joining with a reserved physreg.
     199             :     bool joinReservedPhysReg(CoalescerPair &CP);
     200             : 
     201             :     /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
     202             :     /// Subranges in @p LI which only partially interfere with the desired
     203             :     /// LaneMask are split as necessary. @p LaneMask are the lanes that
     204             :     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
     205             :     /// lanemasks already adjusted to the coalesced register.
     206             :     void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
     207             :                            LaneBitmask LaneMask, CoalescerPair &CP);
     208             : 
     209             :     /// Join the liveranges of two subregisters. Joins @p RRange into
     210             :     /// @p LRange, @p RRange may be invalid afterwards.
     211             :     void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
     212             :                           LaneBitmask LaneMask, const CoalescerPair &CP);
     213             : 
     214             :     /// We found a non-trivially-coalescable copy. If the source value number is
     215             :     /// defined by a copy from the destination reg see if we can merge these two
     216             :     /// destination reg valno# into a single value number, eliminating a copy.
     217             :     /// This returns true if an interval was modified.
     218             :     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
     219             : 
     220             :     /// Return true if there are definitions of IntB
     221             :     /// other than BValNo val# that can reach uses of AValno val# of IntA.
     222             :     bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
     223             :                               VNInfo *AValNo, VNInfo *BValNo);
     224             : 
     225             :     /// We found a non-trivially-coalescable copy.
     226             :     /// If the source value number is defined by a commutable instruction and
     227             :     /// its other operand is coalesced to the copy dest register, see if we
     228             :     /// can transform the copy into a noop by commuting the definition.
     229             :     /// This returns a pair of two flags:
     230             :     /// - the first element is true if an interval was modified,
     231             :     /// - the second element is true if the destination interval needs
     232             :     ///   to be shrunk after deleting the copy.
     233             :     std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
     234             :                                                   MachineInstr *CopyMI);
     235             : 
     236             :     /// We found a copy which can be moved to its less frequent predecessor.
     237             :     bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
     238             : 
     239             :     /// If the source of a copy is defined by a
     240             :     /// trivial computation, replace the copy by rematerialize the definition.
     241             :     bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
     242             :                                  bool &IsDefCopy);
     243             : 
     244             :     /// Return true if a copy involving a physreg should be joined.
     245             :     bool canJoinPhys(const CoalescerPair &CP);
     246             : 
     247             :     /// Replace all defs and uses of SrcReg to DstReg and update the subregister
     248             :     /// number if it is not zero. If DstReg is a physical register and the
     249             :     /// existing subregister number of the def / use being updated is not zero,
     250             :     /// make sure to set it to the correct physical subregister.
     251             :     void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
     252             : 
     253             :     /// If the given machine operand reads only undefined lanes add an undef
     254             :     /// flag.
     255             :     /// This can happen when undef uses were previously concealed by a copy
     256             :     /// which we coalesced. Example:
     257             :     ///    %0:sub0<def,read-undef> = ...
     258             :     ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
     259             :     ///       = use %1:sub1       <-- hidden undef use
     260             :     void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
     261             :                       MachineOperand &MO, unsigned SubRegIdx);
     262             : 
     263             :     /// Handle copies of undef values. If the undef value is an incoming
     264             :     /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
     265             :     /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
     266             :     /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
     267             :     MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
     268             : 
     269             :     /// Check whether or not we should apply the terminal rule on the
     270             :     /// destination (Dst) of \p Copy.
     271             :     /// When the terminal rule applies, Copy is not profitable to
     272             :     /// coalesce.
     273             :     /// Dst is terminal if it has exactly one affinity (Dst, Src) and
     274             :     /// at least one interference (Dst, Dst2). If Dst is terminal, the
     275             :     /// terminal rule consists in checking that at least one of
     276             :     /// interfering node, say Dst2, has an affinity of equal or greater
     277             :     /// weight with Src.
     278             :     /// In that case, Dst2 and Dst will not be able to be both coalesced
     279             :     /// with Src. Since Dst2 exposes more coalescing opportunities than
     280             :     /// Dst, we can drop \p Copy.
     281             :     bool applyTerminalRule(const MachineInstr &Copy) const;
     282             : 
     283             :     /// Wrapper method for \see LiveIntervals::shrinkToUses.
     284             :     /// This method does the proper fixing of the live-ranges when the afore
     285             :     /// mentioned method returns true.
     286           0 :     void shrinkToUses(LiveInterval *LI,
     287             :                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
     288             :       NumShrinkToUses++;
     289           0 :       if (LIS->shrinkToUses(LI, Dead)) {
     290             :         /// Check whether or not \p LI is composed by multiple connected
     291             :         /// components and if that is the case, fix that.
     292             :         SmallVector<LiveInterval*, 8> SplitLIs;
     293           0 :         LIS->splitSeparateComponents(*LI, SplitLIs);
     294             :       }
     295           0 :     }
     296             : 
     297             :     /// Wrapper Method to do all the necessary work when an Instruction is
     298             :     /// deleted.
     299             :     /// Optimizations should use this to make sure that deleted instructions
     300             :     /// are always accounted for.
     301       46881 :     void deleteInstr(MachineInstr* MI) {
     302       46881 :       ErasedInstrs.insert(MI);
     303       46881 :       LIS->RemoveMachineInstrFromMaps(*MI);
     304       46881 :       MI->eraseFromParent();
     305       46881 :     }
     306             : 
     307             :   public:
     308             :     static char ID; ///< Class identification, replacement for typeinfo
     309             : 
     310       20077 :     RegisterCoalescer() : MachineFunctionPass(ID) {
     311       20077 :       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     312       20077 :     }
     313             : 
     314             :     void getAnalysisUsage(AnalysisUsage &AU) const override;
     315             : 
     316             :     void releaseMemory() override;
     317             : 
     318             :     /// This is the pass entry point.
     319             :     bool runOnMachineFunction(MachineFunction&) override;
     320             : 
     321             :     /// Implement the dump method.
     322             :     void print(raw_ostream &O, const Module* = nullptr) const override;
     323             :   };
     324             : 
     325             : } // end anonymous namespace
     326             : 
     327             : char RegisterCoalescer::ID = 0;
     328             : 
     329             : char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
     330             : 
     331       32750 : INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
     332             :                       "Simple Register Coalescing", false, false)
     333       32750 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     334       32750 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
     335       32750 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     336       32750 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     337      172450 : INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
     338             :                     "Simple Register Coalescing", false, false)
     339             : 
     340     5606158 : static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
     341             :                         unsigned &Src, unsigned &Dst,
     342             :                         unsigned &SrcSub, unsigned &DstSub) {
     343     5606158 :   if (MI->isCopy()) {
     344     4702543 :     Dst = MI->getOperand(0).getReg();
     345     4702543 :     DstSub = MI->getOperand(0).getSubReg();
     346     4702543 :     Src = MI->getOperand(1).getReg();
     347     4702543 :     SrcSub = MI->getOperand(1).getSubReg();
     348      903615 :   } else if (MI->isSubregToReg()) {
     349       51063 :     Dst = MI->getOperand(0).getReg();
     350      102126 :     DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
     351       51063 :                                       MI->getOperand(3).getImm());
     352       51063 :     Src = MI->getOperand(2).getReg();
     353       51063 :     SrcSub = MI->getOperand(2).getSubReg();
     354             :   } else
     355             :     return false;
     356             :   return true;
     357             : }
     358             : 
     359             : /// Return true if this block should be vacated by the coalescer to eliminate
     360             : /// branches. The important cases to handle in the coalescer are critical edges
     361             : /// split during phi elimination which contain only copies. Simple blocks that
     362             : /// contain non-branches should also be vacated, but this can be handled by an
     363             : /// earlier pass similar to early if-conversion.
     364           0 : static bool isSplitEdge(const MachineBasicBlock *MBB) {
     365           0 :   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
     366             :     return false;
     367             : 
     368           0 :   for (const auto &MI : *MBB) {
     369           0 :     if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
     370             :       return false;
     371             :   }
     372             :   return true;
     373             : }
     374             : 
     375     1849606 : bool CoalescerPair::setRegisters(const MachineInstr *MI) {
     376     1849606 :   SrcReg = DstReg = 0;
     377     1849606 :   SrcIdx = DstIdx = 0;
     378     1849606 :   NewRC = nullptr;
     379     1849606 :   Flipped = CrossClass = false;
     380             : 
     381             :   unsigned Src, Dst, SrcSub, DstSub;
     382     1849606 :   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
     383             :     return false;
     384     1849469 :   Partial = SrcSub || DstSub;
     385             : 
     386             :   // If one register is a physreg, it must be Dst.
     387     3698938 :   if (TargetRegisterInfo::isPhysicalRegister(Src)) {
     388      889440 :     if (TargetRegisterInfo::isPhysicalRegister(Dst))
     389             :       return false;
     390             :     std::swap(Src, Dst);
     391             :     std::swap(SrcSub, DstSub);
     392      443681 :     Flipped = true;
     393             :   }
     394             : 
     395     1848430 :   const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
     396             : 
     397     3696860 :   if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
     398             :     // Eliminate DstSub on a physreg.
     399      883440 :     if (DstSub) {
     400           2 :       Dst = TRI.getSubReg(Dst, DstSub);
     401           2 :       if (!Dst) return false;
     402           2 :       DstSub = 0;
     403             :     }
     404             : 
     405             :     // Eliminate SrcSub by picking a corresponding Dst superregister.
     406      883440 :     if (SrcSub) {
     407       31342 :       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
     408       15671 :       if (!Dst) return false;
     409     1735538 :     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
     410             :       return false;
     411             :     }
     412             :   } else {
     413             :     // Both registers are virtual.
     414      964990 :     const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
     415             :     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
     416             : 
     417             :     // Both registers have subreg indices.
     418      964990 :     if (SrcSub && DstSub) {
     419             :       // Copies between different sub-registers are never coalescable.
     420       66670 :       if (Src == Dst && SrcSub != DstSub)
     421             :         return false;
     422             : 
     423      193827 :       NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
     424       64609 :                                          SrcIdx, DstIdx);
     425       64609 :       if (!NewRC)
     426             :         return false;
     427      898320 :     } else if (DstSub) {
     428             :       // SrcReg will be merged with a sub-register of DstReg.
     429      134765 :       SrcIdx = DstSub;
     430      134765 :       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
     431      763555 :     } else if (SrcSub) {
     432             :       // DstReg will be merged with a sub-register of SrcReg.
     433      125254 :       DstIdx = SrcSub;
     434      125254 :       NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
     435             :     } else {
     436             :       // This is a straight copy without sub-registers.
     437      638301 :       NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
     438             :     }
     439             : 
     440             :     // The combined constraint may be impossible to satisfy.
     441      957063 :     if (!NewRC)
     442             :       return false;
     443             : 
     444             :     // Prefer SrcReg to be a sub-register of DstReg.
     445             :     // FIXME: Coalescer should support subregs symmetrically.
     446      913050 :     if (DstIdx && !SrcIdx) {
     447             :       std::swap(Src, Dst);
     448             :       std::swap(SrcIdx, DstIdx);
     449      115436 :       Flipped = !Flipped;
     450             :     }
     451             : 
     452     1527440 :     CrossClass = NewRC != DstRC || NewRC != SrcRC;
     453             :   }
     454             :   // Check our invariants
     455             :   assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
     456             :   assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
     457             :          "Cannot have a physical SubIdx");
     458     1792479 :   SrcReg = Src;
     459     1792479 :   DstReg = Dst;
     460     1792479 :   return true;
     461             : }
     462             : 
     463      166364 : bool CoalescerPair::flip() {
     464      332728 :   if (TargetRegisterInfo::isPhysicalRegister(DstReg))
     465             :     return false;
     466             :   std::swap(SrcReg, DstReg);
     467             :   std::swap(SrcIdx, DstIdx);
     468      166364 :   Flipped = !Flipped;
     469      166364 :   return true;
     470             : }
     471             : 
     472     2853735 : bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
     473     2853735 :   if (!MI)
     474             :     return false;
     475             :   unsigned Src, Dst, SrcSub, DstSub;
     476     2845288 :   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
     477             :     return false;
     478             : 
     479             :   // Find the virtual register that is SrcReg.
     480     1992873 :   if (Dst == SrcReg) {
     481             :     std::swap(Src, Dst);
     482             :     std::swap(SrcSub, DstSub);
     483     1568111 :   } else if (Src != SrcReg) {
     484             :     return false;
     485             :   }
     486             : 
     487             :   // Now check that Dst matches DstReg.
     488     3645316 :   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
     489     1303578 :     if (!TargetRegisterInfo::isPhysicalRegister(Dst))
     490             :       return false;
     491             :     assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
     492             :     // DstSub could be set for a physreg from INSERT_SUBREG.
     493      638170 :     if (DstSub)
     494           0 :       Dst = TRI.getSubReg(Dst, DstSub);
     495             :     // Full copy of Src.
     496      638170 :     if (!SrcSub)
     497      632710 :       return DstReg == Dst;
     498             :     // This is a partial register copy. Check that the parts match.
     499        5460 :     return TRI.getSubReg(DstReg, SrcSub) == Dst;
     500             :   } else {
     501             :     // DstReg is virtual.
     502     1170869 :     if (DstReg != Dst)
     503             :       return false;
     504             :     // Registers match, do the subregisters line up?
     505     1128352 :     return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
     506     1128392 :            TRI.composeSubRegIndices(DstIdx, DstSub);
     507             :   }
     508             : }
     509             : 
     510       19918 : void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
     511       19918 :   AU.setPreservesCFG();
     512             :   AU.addRequired<AAResultsWrapperPass>();
     513             :   AU.addRequired<LiveIntervals>();
     514             :   AU.addPreserved<LiveIntervals>();
     515             :   AU.addPreserved<SlotIndexes>();
     516             :   AU.addRequired<MachineLoopInfo>();
     517             :   AU.addPreserved<MachineLoopInfo>();
     518       19918 :   AU.addPreservedID(MachineDominatorsID);
     519       19918 :   MachineFunctionPass::getAnalysisUsage(AU);
     520       19918 : }
     521             : 
     522       29943 : void RegisterCoalescer::eliminateDeadDefs() {
     523             :   SmallVector<unsigned, 8> NewRegs;
     524       59886 :   LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
     525       29943 :                 nullptr, this).eliminateDeadDefs(DeadDefs);
     526       29943 : }
     527             : 
     528       29947 : void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
     529             :   // MI may be in WorkList. Make sure we don't visit it.
     530       29947 :   ErasedInstrs.insert(MI);
     531       29947 : }
     532             : 
     533       73288 : bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
     534             :                                              MachineInstr *CopyMI) {
     535             :   assert(!CP.isPartial() && "This doesn't work for partial copies.");
     536             :   assert(!CP.isPhys() && "This doesn't work for physreg copies.");
     537             : 
     538             :   LiveInterval &IntA =
     539       73288 :     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
     540             :   LiveInterval &IntB =
     541       73288 :     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
     542       73288 :   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
     543             : 
     544             :   // We have a non-trivially-coalescable copy with IntA being the source and
     545             :   // IntB being the dest, thus this defines a value number in IntB.  If the
     546             :   // source value number (in IntA) is defined by a copy from B, see if we can
     547             :   // merge these two pieces of B into a single value number, eliminating a copy.
     548             :   // For example:
     549             :   //
     550             :   //  A3 = B0
     551             :   //    ...
     552             :   //  B1 = A3      <- this copy
     553             :   //
     554             :   // In this case, B0 can be extended to where the B1 copy lives, allowing the
     555             :   // B1 value number to be replaced with B0 (which simplifies the B
     556             :   // liveinterval).
     557             : 
     558             :   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
     559             :   // the example above.
     560       73288 :   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
     561       73288 :   if (BS == IntB.end()) return false;
     562       73288 :   VNInfo *BValNo = BS->valno;
     563             : 
     564             :   // Get the location that B is defined at.  Two options: either this value has
     565             :   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
     566             :   // can't process it.
     567       73288 :   if (BValNo->def != CopyIdx) return false;
     568             : 
     569             :   // AValNo is the value number in A that defines the copy, A3 in the example.
     570       73288 :   SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
     571       73288 :   LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
     572             :   // The live segment might not exist after fun with physreg coalescing.
     573       73288 :   if (AS == IntA.end()) return false;
     574       73288 :   VNInfo *AValNo = AS->valno;
     575             : 
     576             :   // If AValNo is defined as a copy from IntB, we can potentially process this.
     577             :   // Get the instruction that defines this value number.
     578             :   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
     579             :   // Don't allow any partial copies, even if isCoalescable() allows them.
     580       73288 :   if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
     581             :     return false;
     582             : 
     583             :   // Get the Segment in IntB that this value number starts with.
     584             :   LiveInterval::iterator ValS =
     585          31 :     IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
     586          31 :   if (ValS == IntB.end())
     587             :     return false;
     588             : 
     589             :   // Make sure that the end of the live segment is inside the same block as
     590             :   // CopyMI.
     591             :   MachineInstr *ValSEndInst =
     592             :     LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
     593          31 :   if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
     594             :     return false;
     595             : 
     596             :   // Okay, we now know that ValS ends in the same block that the CopyMI
     597             :   // live-range starts.  If there are no intervening live segments between them
     598             :   // in IntB, we can merge them.
     599          14 :   if (ValS+1 != BS) return false;
     600             : 
     601             :   LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
     602             : 
     603          14 :   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
     604             :   // We are about to delete CopyMI, so need to remove it as the 'instruction
     605             :   // that defines this value #'. Update the valnum with the new defining
     606             :   // instruction #.
     607          14 :   BValNo->def = FillerStart;
     608             : 
     609             :   // Okay, we can merge them.  We need to insert a new liverange:
     610             :   // [ValS.end, BS.begin) of either value number, then we merge the
     611             :   // two value numbers.
     612          14 :   IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
     613             : 
     614             :   // Okay, merge "B1" into the same value number as "B0".
     615          14 :   if (BValNo != ValS->valno)
     616          14 :     IntB.MergeValueNumberInto(BValNo, ValS->valno);
     617             : 
     618             :   // Do the same for the subregister segments.
     619          17 :   for (LiveInterval::SubRange &S : IntB.subranges()) {
     620             :     // Check for SubRange Segments of the form [1234r,1234d:0) which can be
     621             :     // removed to prevent creating bogus SubRange Segments.
     622           3 :     LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
     623           3 :     if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
     624           1 :       S.removeSegment(*SS, true);
     625           1 :       continue;
     626             :     }
     627           2 :     VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
     628           2 :     S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
     629           2 :     VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
     630           2 :     if (SubBValNo != SubValSNo)
     631           2 :       S.MergeValueNumberInto(SubBValNo, SubValSNo);
     632             :   }
     633             : 
     634             :   LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
     635             : 
     636             :   // If the source instruction was killing the source register before the
     637             :   // merge, unset the isKill marker given the live range has been extended.
     638          14 :   int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
     639          14 :   if (UIdx != -1) {
     640           0 :     ValSEndInst->getOperand(UIdx).setIsKill(false);
     641             :   }
     642             : 
     643             :   // Rewrite the copy.
     644          14 :   CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
     645             :   // If the copy instruction was killing the destination register or any
     646             :   // subrange before the merge trim the live range.
     647             :   bool RecomputeLiveRange = AS->end == CopyIdx;
     648          14 :   if (!RecomputeLiveRange) {
     649          14 :     for (LiveInterval::SubRange &S : IntA.subranges()) {
     650           2 :       LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
     651           2 :       if (SS != S.end() && SS->end == CopyIdx) {
     652             :         RecomputeLiveRange = true;
     653             :         break;
     654             :       }
     655             :     }
     656             :   }
     657          14 :   if (RecomputeLiveRange)
     658           2 :     shrinkToUses(&IntA);
     659             : 
     660             :   ++numExtends;
     661             :   return true;
     662             : }
     663             : 
     664           0 : bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
     665             :                                              LiveInterval &IntB,
     666             :                                              VNInfo *AValNo,
     667             :                                              VNInfo *BValNo) {
     668             :   // If AValNo has PHI kills, conservatively assume that IntB defs can reach
     669             :   // the PHI values.
     670           0 :   if (LIS->hasPHIKill(IntA, AValNo))
     671           0 :     return true;
     672             : 
     673           0 :   for (LiveRange::Segment &ASeg : IntA.segments) {
     674           0 :     if (ASeg.valno != AValNo) continue;
     675             :     LiveInterval::iterator BI =
     676           0 :       std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
     677           0 :     if (BI != IntB.begin())
     678           0 :       --BI;
     679           0 :     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
     680           0 :       if (BI->valno == BValNo)
     681           0 :         continue;
     682           0 :       if (BI->start <= ASeg.start && BI->end > ASeg.start)
     683           0 :         return true;
     684           0 :       if (BI->start > ASeg.start && BI->start < ASeg.end)
     685           0 :         return true;
     686             :     }
     687             :   }
     688             :   return false;
     689             : }
     690             : 
     691             : /// Copy segments with value number @p SrcValNo from liverange @p Src to live
     692             : /// range @Dst and use value number @p DstValNo there.
     693             : static std::pair<bool,bool>
     694         107 : addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
     695             :                      const VNInfo *SrcValNo) {
     696             :   bool Changed = false;
     697             :   bool MergedWithDead = false;
     698         789 :   for (const LiveRange::Segment &S : Src.segments) {
     699         682 :     if (S.valno != SrcValNo)
     700         568 :       continue;
     701             :     // This is adding a segment from Src that ends in a copy that is about
     702             :     // to be removed. This segment is going to be merged with a pre-existing
     703             :     // segment in Dst. This works, except in cases when the corresponding
     704             :     // segment in Dst is dead. For example: adding [192r,208r:1) from Src
     705             :     // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
     706             :     // Recognized such cases, so that the segments can be shrunk.
     707             :     LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
     708         114 :     LiveRange::Segment &Merged = *Dst.addSegment(Added);
     709         114 :     if (Merged.end.isDead())
     710             :       MergedWithDead = true;
     711             :     Changed = true;
     712             :   }
     713         107 :   return std::make_pair(Changed, MergedWithDead);
     714             : }
     715             : 
     716             : std::pair<bool,bool>
     717       73274 : RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
     718             :                                             MachineInstr *CopyMI) {
     719             :   assert(!CP.isPhys());
     720             : 
     721             :   LiveInterval &IntA =
     722       73274 :       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
     723             :   LiveInterval &IntB =
     724       73274 :       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
     725             : 
     726             :   // We found a non-trivially-coalescable copy with IntA being the source and
     727             :   // IntB being the dest, thus this defines a value number in IntB.  If the
     728             :   // source value number (in IntA) is defined by a commutable instruction and
     729             :   // its other operand is coalesced to the copy dest register, see if we can
     730             :   // transform the copy into a noop by commuting the definition. For example,
     731             :   //
     732             :   //  A3 = op A2 killed B0
     733             :   //    ...
     734             :   //  B1 = A3      <- this copy
     735             :   //    ...
     736             :   //     = op A3   <- more uses
     737             :   //
     738             :   // ==>
     739             :   //
     740             :   //  B2 = op B0 killed A2
     741             :   //    ...
     742             :   //  B1 = B2      <- now an identity copy
     743             :   //    ...
     744             :   //     = op B2   <- more uses
     745             : 
     746             :   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
     747             :   // the example above.
     748       73274 :   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
     749       73274 :   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
     750             :   assert(BValNo != nullptr && BValNo->def == CopyIdx);
     751             : 
     752             :   // AValNo is the value number in A that defines the copy, A3 in the example.
     753      146548 :   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
     754             :   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
     755       73274 :   if (AValNo->isPHIDef())
     756        8447 :     return { false, false };
     757             :   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
     758       64827 :   if (!DefMI)
     759           0 :     return { false, false };
     760       64827 :   if (!DefMI->isCommutable())
     761       58176 :     return { false, false };
     762             :   // If DefMI is a two-address instruction then commuting it will change the
     763             :   // destination register.
     764        6651 :   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
     765             :   assert(DefIdx != -1);
     766             :   unsigned UseOpIdx;
     767        6651 :   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
     768         211 :     return { false, false };
     769             : 
     770             :   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
     771             :   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
     772             :   // passed to the method. That _other_ operand is chosen by
     773             :   // the findCommutedOpIndices() method.
     774             :   //
     775             :   // That is obviously an area for improvement in case of instructions having
     776             :   // more than 2 operands. For example, if some instruction has 3 commutable
     777             :   // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
     778             :   // op#2<->op#3) of commute transformation should be considered/tried here.
     779        6440 :   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
     780        6440 :   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
     781         138 :     return { false, false };
     782             : 
     783        6302 :   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
     784        6302 :   unsigned NewReg = NewDstMO.getReg();
     785        6302 :   if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
     786        6197 :     return { false, false };
     787             : 
     788             :   // Make sure there are no other definitions of IntB that would reach the
     789             :   // uses which the new definition can reach.
     790         105 :   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
     791           5 :     return { false, false };
     792             : 
     793             :   // If some of the uses of IntA.reg is already coalesced away, return false.
     794             :   // It's not possible to determine whether it's safe to perform the coalescing.
     795         981 :   for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
     796         781 :     MachineInstr *UseMI = MO.getParent();
     797         781 :     unsigned OpNo = &MO - &UseMI->getOperand(0);
     798         781 :     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
     799         781 :     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
     800         781 :     if (US == IntA.end() || US->valno != AValNo)
     801         640 :       continue;
     802             :     // If this use is tied to a def, we can't rewrite the register.
     803         141 :     if (UseMI->isRegTiedToDefOperand(OpNo))
     804           0 :       return { false, false };
     805             :   }
     806             : 
     807             :   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
     808             :                     << *DefMI);
     809             : 
     810             :   // At this point we have decided that it is legal to do this
     811             :   // transformation.  Start by commuting the instruction.
     812         100 :   MachineBasicBlock *MBB = DefMI->getParent();
     813             :   MachineInstr *NewMI =
     814         100 :       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
     815         100 :   if (!NewMI)
     816           0 :     return { false, false };
     817         100 :   if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
     818         200 :       TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
     819         200 :       !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
     820           0 :     return { false, false };
     821         100 :   if (NewMI != DefMI) {
     822           0 :     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
     823             :     MachineBasicBlock::iterator Pos = DefMI;
     824             :     MBB->insert(Pos, NewMI);
     825             :     MBB->erase(DefMI);
     826             :   }
     827             : 
     828             :   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
     829             :   // A = or A, B
     830             :   // ...
     831             :   // B = A
     832             :   // ...
     833             :   // C = killed A
     834             :   // ...
     835             :   //   = B
     836             : 
     837             :   // Update uses of IntA of the specific Val# with IntB.
     838         100 :   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
     839             :                                          UE = MRI->use_end();
     840        1174 :        UI != UE; /* ++UI is below because of possible MI removal */) {
     841             :     MachineOperand &UseMO = *UI;
     842             :     ++UI;
     843        1074 :     if (UseMO.isUndef())
     844             :       continue;
     845        1074 :     MachineInstr *UseMI = UseMO.getParent();
     846        1074 :     if (UseMI->isDebugValue()) {
     847             :       // FIXME These don't have an instruction index.  Not clear we have enough
     848             :       // info to decide whether to do this replacement or not.  For now do it.
     849         293 :       UseMO.setReg(NewReg);
     850         293 :       continue;
     851             :     }
     852         781 :     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
     853         781 :     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
     854             :     assert(US != IntA.end() && "Use must be live");
     855         781 :     if (US->valno != AValNo)
     856             :       continue;
     857             :     // Kill flags are no longer accurate. They are recomputed after RA.
     858             :     UseMO.setIsKill(false);
     859         141 :     if (TargetRegisterInfo::isPhysicalRegister(NewReg))
     860           0 :       UseMO.substPhysReg(NewReg, *TRI);
     861             :     else
     862         141 :       UseMO.setReg(NewReg);
     863         141 :     if (UseMI == CopyMI)
     864             :       continue;
     865          41 :     if (!UseMI->isCopy())
     866             :       continue;
     867          16 :     if (UseMI->getOperand(0).getReg() != IntB.reg ||
     868             :         UseMI->getOperand(0).getSubReg())
     869             :       continue;
     870             : 
     871             :     // This copy will become a noop. If it's defining a new val#, merge it into
     872             :     // BValNo.
     873             :     SlotIndex DefIdx = UseIdx.getRegSlot();
     874           0 :     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
     875           5 :     if (!DVNI)
     876           0 :       continue;
     877             :     LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
     878             :     assert(DVNI->def == DefIdx);
     879           5 :     BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
     880           5 :     for (LiveInterval::SubRange &S : IntB.subranges()) {
     881           0 :       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
     882           0 :       if (!SubDVNI)
     883           0 :         continue;
     884           0 :       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
     885             :       assert(SubBValNo->def == CopyIdx);
     886           0 :       S.MergeValueNumberInto(SubDVNI, SubBValNo);
     887             :     }
     888             : 
     889           5 :     deleteInstr(UseMI);
     890             :   }
     891             : 
     892             :   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
     893             :   // is updated.
     894         100 :   bool ShrinkB = false;
     895         100 :   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
     896         100 :   if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
     897           4 :     if (!IntA.hasSubRanges()) {
     898           2 :       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
     899           2 :       IntA.createSubRangeFrom(Allocator, Mask, IntA);
     900           2 :     } else if (!IntB.hasSubRanges()) {
     901           1 :       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
     902           1 :       IntB.createSubRangeFrom(Allocator, Mask, IntB);
     903             :     }
     904             :     SlotIndex AIdx = CopyIdx.getRegSlot(true);
     905             :     LaneBitmask MaskA;
     906           9 :     for (LiveInterval::SubRange &SA : IntA.subranges()) {
     907          10 :       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
     908             :       assert(ASubValNo != nullptr);
     909             :       MaskA |= SA.LaneMask;
     910             : 
     911          10 :       IntB.refineSubRanges(Allocator, SA.LaneMask,
     912             :           [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
     913             :             (LiveInterval::SubRange &SR) {
     914             :         VNInfo *BSubValNo = SR.empty()
     915             :           ? SR.getNextValue(CopyIdx, Allocator)
     916             :           : SR.getVNInfoAt(CopyIdx);
     917             :         assert(BSubValNo != nullptr);
     918             :         auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
     919             :         ShrinkB |= P.second;
     920             :         if (P.first)
     921             :           BSubValNo->def = ASubValNo->def;
     922             :       });
     923             :     }
     924             :     // Go over all subranges of IntB that have not been covered by IntA,
     925             :     // and delete the segments starting at CopyIdx. This can happen if
     926             :     // IntA has undef lanes that are defined in IntB.
     927          12 :     for (LiveInterval::SubRange &SB : IntB.subranges()) {
     928          16 :       if ((SB.LaneMask & MaskA).any())
     929             :         continue;
     930           2 :       if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
     931           1 :         if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
     932           1 :           SB.removeSegment(*S, true);
     933             :     }
     934             :   }
     935             : 
     936         100 :   BValNo->def = AValNo->def;
     937         100 :   auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
     938         100 :   ShrinkB |= P.second;
     939             :   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
     940             : 
     941         100 :   LIS->removeVRegDefAt(IntA, AValNo->def);
     942             : 
     943             :   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
     944             :   ++numCommutes;
     945         100 :   return { true, ShrinkB };
     946             : }
     947             : 
     948             : /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
     949             : /// predecessor of BB2, and if B is not redefined on the way from A = B
     950             : /// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
     951             : /// execution goes through the path from BB0 to BB2. We may move B = A
     952             : /// to the predecessor without such reversed copy.
     953             : /// So we will transform the program from:
     954             : ///   BB0:
     955             : ///      A = B;    BB1:
     956             : ///       ...         ...
     957             : ///     /     \      /
     958             : ///             BB2:
     959             : ///               ...
     960             : ///               B = A;
     961             : ///
     962             : /// to:
     963             : ///
     964             : ///   BB0:         BB1:
     965             : ///      A = B;        ...
     966             : ///       ...          B = A;
     967             : ///     /     \       /
     968             : ///             BB2:
     969             : ///               ...
     970             : ///
     971             : /// A special case is when BB0 and BB2 are the same BB which is the only
     972             : /// BB in a loop:
     973             : ///   BB1:
     974             : ///        ...
     975             : ///   BB0/BB2:  ----
     976             : ///        B = A;   |
     977             : ///        ...      |
     978             : ///        A = B;   |
     979             : ///          |-------
     980             : ///          |
     981             : /// We may hoist B = A from BB0/BB2 to BB1.
     982             : ///
     983             : /// The major preconditions for correctness to remove such partial
     984             : /// redundancy include:
     985             : /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
     986             : ///    the PHI is defined by the reversed copy A = B in BB0.
     987             : /// 2. No B is referenced from the start of BB2 to B = A.
     988             : /// 3. No B is defined from A = B to the end of BB0.
     989             : /// 4. BB1 has only one successor.
     990             : ///
     991             : /// 2 and 4 implicitly ensure B is not live at the end of BB1.
     992             : /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
     993             : /// colder place, which not only prevent endless loop, but also make sure
     994             : /// the movement of copy is beneficial.
     995       73174 : bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
     996             :                                                 MachineInstr &CopyMI) {
     997             :   assert(!CP.isPhys());
     998             :   if (!CopyMI.isFullCopy())
     999             :     return false;
    1000             : 
    1001       73174 :   MachineBasicBlock &MBB = *CopyMI.getParent();
    1002       73174 :   if (MBB.isEHPad())
    1003             :     return false;
    1004             : 
    1005       73170 :   if (MBB.pred_size() != 2)
    1006             :     return false;
    1007             : 
    1008             :   LiveInterval &IntA =
    1009       13445 :       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    1010             :   LiveInterval &IntB =
    1011       13445 :       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    1012             : 
    1013             :   // A is defined by PHI at the entry of MBB.
    1014       13445 :   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
    1015       26890 :   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
    1016             :   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
    1017       13445 :   if (!AValNo->isPHIDef())
    1018             :     return false;
    1019             : 
    1020             :   // No B is referenced before CopyMI in MBB.
    1021       11584 :   if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
    1022             :     return false;
    1023             : 
    1024             :   // MBB has two predecessors: one contains A = B so no copy will be inserted
    1025             :   // for it. The other one will have a copy moved from MBB.
    1026             :   bool FoundReverseCopy = false;
    1027             :   MachineBasicBlock *CopyLeftBB = nullptr;
    1028       16917 :   for (MachineBasicBlock *Pred : MBB.predecessors()) {
    1029       22556 :     VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
    1030       11278 :     MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
    1031       11278 :     if (!DefMI || !DefMI->isFullCopy()) {
    1032             :       CopyLeftBB = Pred;
    1033             :       continue;
    1034             :     }
    1035             :     // Check DefMI is a reverse copy and it is in BB Pred.
    1036        9810 :     if (DefMI->getOperand(0).getReg() != IntA.reg ||
    1037        4905 :         DefMI->getOperand(1).getReg() != IntB.reg ||
    1038         152 :         DefMI->getParent() != Pred) {
    1039             :       CopyLeftBB = Pred;
    1040             :       continue;
    1041             :     }
    1042             :     // If there is any other def of B after DefMI and before the end of Pred,
    1043             :     // we need to keep the copy of B = A at the end of Pred if we remove
    1044             :     // B = A from MBB.
    1045             :     bool ValB_Changed = false;
    1046         797 :     for (auto VNI : IntB.valnos) {
    1047         645 :       if (VNI->isUnused())
    1048             :         continue;
    1049         645 :       if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
    1050             :         ValB_Changed = true;
    1051             :         break;
    1052             :       }
    1053             :     }
    1054         152 :     if (ValB_Changed) {
    1055             :       CopyLeftBB = Pred;
    1056           0 :       continue;
    1057             :     }
    1058             :     FoundReverseCopy = true;
    1059             :   }
    1060             : 
    1061             :   // If no reverse copy is found in predecessors, nothing to do.
    1062        5639 :   if (!FoundReverseCopy)
    1063             :     return false;
    1064             : 
    1065             :   // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
    1066             :   // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
    1067             :   // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
    1068             :   // update IntA/IntB.
    1069             :   //
    1070             :   // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
    1071             :   // MBB is hotter than CopyLeftBB.
    1072         151 :   if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
    1073             :     return false;
    1074             : 
    1075             :   // Now (almost sure it's) ok to move copy.
    1076         116 :   if (CopyLeftBB) {
    1077             :     // Position in CopyLeftBB where we should insert new copy.
    1078         115 :     auto InsPos = CopyLeftBB->getFirstTerminator();
    1079             : 
    1080             :     // Make sure that B isn't referenced in the terminators (if any) at the end
    1081             :     // of the predecessor since we're about to insert a new definition of B
    1082             :     // before them.
    1083         115 :     if (InsPos != CopyLeftBB->end()) {
    1084          70 :       SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
    1085         140 :       if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
    1086           1 :         return false;
    1087             :     }
    1088             : 
    1089             :     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
    1090             :                       << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
    1091             : 
    1092             :     // Insert new copy to CopyLeftBB.
    1093         114 :     MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
    1094         228 :                                       TII->get(TargetOpcode::COPY), IntB.reg)
    1095         114 :                                   .addReg(IntA.reg);
    1096             :     SlotIndex NewCopyIdx =
    1097         114 :         LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
    1098         228 :     IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
    1099         114 :     for (LiveInterval::SubRange &SR : IntB.subranges())
    1100           0 :       SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
    1101             : 
    1102             :     // If the newly created Instruction has an address of an instruction that was
    1103             :     // deleted before (object recycled by the allocator) it needs to be removed from
    1104             :     // the deleted list.
    1105             :     ErasedInstrs.erase(NewCopyMI);
    1106             :   } else {
    1107             :     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
    1108             :                       << printMBBReference(MBB) << '\t' << CopyMI);
    1109             :   }
    1110             : 
    1111             :   // Remove CopyMI.
    1112             :   // Note: This is fine to remove the copy before updating the live-ranges.
    1113             :   // While updating the live-ranges, we only look at slot indices and
    1114             :   // never go back to the instruction.
    1115             :   // Mark instructions as deleted.
    1116         115 :   deleteInstr(&CopyMI);
    1117             : 
    1118             :   // Update the liveness.
    1119             :   SmallVector<SlotIndex, 8> EndPoints;
    1120         115 :   VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
    1121         230 :   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
    1122             :                   &EndPoints);
    1123             :   BValNo->markUnused();
    1124             :   // Extend IntB to the EndPoints of its original live interval.
    1125         115 :   LIS->extendToIndices(IntB, EndPoints);
    1126             : 
    1127             :   // Now, do the same for its subranges.
    1128         118 :   for (LiveInterval::SubRange &SR : IntB.subranges()) {
    1129             :     EndPoints.clear();
    1130           3 :     VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
    1131             :     assert(BValNo && "All sublanes should be live");
    1132           3 :     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
    1133             :     BValNo->markUnused();
    1134             :     // We can have a situation where the result of the original copy is live,
    1135             :     // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
    1136             :     // the copy appear as an endpoint from pruneValue(), but we don't want it
    1137             :     // to because the copy has been removed.  We can go ahead and remove that
    1138             :     // endpoint; there is no other situation here that there could be a use at
    1139             :     // the same place as we know that the copy is a full copy.
    1140          18 :     for (unsigned I = 0; I != EndPoints.size(); ) {
    1141           6 :       if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
    1142           2 :         EndPoints[I] = EndPoints.back();
    1143             :         EndPoints.pop_back();
    1144           2 :         continue;
    1145             :       }
    1146           4 :       ++I;
    1147             :     }
    1148           3 :     LIS->extendToIndices(SR, EndPoints);
    1149             :   }
    1150             :   // If any dead defs were extended, truncate them.
    1151         115 :   shrinkToUses(&IntB);
    1152             : 
    1153             :   // Finally, update the live-range of IntA.
    1154         115 :   shrinkToUses(&IntA);
    1155             :   return true;
    1156             : }
    1157             : 
    1158             : /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
    1159             : /// defining a subregister.
    1160       41314 : static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
    1161             :   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
    1162             :          "This code cannot handle physreg aliasing");
    1163       44977 :   for (const MachineOperand &Op : MI.operands()) {
    1164       43756 :     if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
    1165             :       continue;
    1166             :     // Return true if we define the full register or don't care about the value
    1167             :     // inside other subregisters.
    1168       41314 :     if (Op.getSubReg() == 0 || Op.isUndef())
    1169             :       return true;
    1170             :   }
    1171             :   return false;
    1172             : }
    1173             : 
    1174      944240 : bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
    1175             :                                                 MachineInstr *CopyMI,
    1176             :                                                 bool &IsDefCopy) {
    1177      944240 :   IsDefCopy = false;
    1178      944240 :   unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
    1179      944240 :   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
    1180      944240 :   unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
    1181      944240 :   unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
    1182      944240 :   if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
    1183             :     return false;
    1184             : 
    1185      549382 :   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
    1186      549382 :   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
    1187      549382 :   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
    1188      549382 :   if (!ValNo)
    1189             :     return false;
    1190      549381 :   if (ValNo->isPHIDef() || ValNo->isUnused())
    1191             :     return false;
    1192             :   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
    1193      532631 :   if (!DefMI)
    1194             :     return false;
    1195             :   if (DefMI->isCopyLike()) {
    1196      158208 :     IsDefCopy = true;
    1197      158208 :     return false;
    1198             :   }
    1199      374423 :   if (!TII->isAsCheapAsAMove(*DefMI))
    1200             :     return false;
    1201       44756 :   if (!TII->isTriviallyReMaterializable(*DefMI, AA))
    1202             :     return false;
    1203       41314 :   if (!definesFullReg(*DefMI, SrcReg))
    1204             :     return false;
    1205       40093 :   bool SawStore = false;
    1206       40093 :   if (!DefMI->isSafeToMove(AA, SawStore))
    1207             :     return false;
    1208       40093 :   const MCInstrDesc &MCID = DefMI->getDesc();
    1209       40093 :   if (MCID.getNumDefs() != 1)
    1210             :     return false;
    1211             :   // Only support subregister destinations when the def is read-undef.
    1212       40093 :   MachineOperand &DstOperand = CopyMI->getOperand(0);
    1213       40093 :   unsigned CopyDstReg = DstOperand.getReg();
    1214       40093 :   if (DstOperand.getSubReg() && !DstOperand.isUndef())
    1215             :     return false;
    1216             : 
    1217             :   // If both SrcIdx and DstIdx are set, correct rematerialization would widen
    1218             :   // the register substantially (beyond both source and dest size). This is bad
    1219             :   // for performance since it can cascade through a function, introducing many
    1220             :   // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
    1221             :   // around after a few subreg copies).
    1222       40017 :   if (SrcIdx && DstIdx)
    1223             :     return false;
    1224             : 
    1225       40017 :   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
    1226       40017 :   if (!DefMI->isImplicitDef()) {
    1227       40017 :     if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
    1228             :       unsigned NewDstReg = DstReg;
    1229             : 
    1230       35084 :       unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
    1231       35084 :                                               DefMI->getOperand(0).getSubReg());
    1232       35084 :       if (NewDstIdx)
    1233        6694 :         NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
    1234             : 
    1235             :       // Finally, make sure that the physical subregister that will be
    1236             :       // constructed later is permitted for the instruction.
    1237       35084 :       if (!DefRC->contains(NewDstReg))
    1238             :         return false;
    1239             :     } else {
    1240             :       // Theoretically, some stack frame reference could exist. Just make sure
    1241             :       // it hasn't actually happened.
    1242             :       assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
    1243             :              "Only expect to deal with virtual or physical registers");
    1244             :     }
    1245             :   }
    1246             : 
    1247             :   DebugLoc DL = CopyMI->getDebugLoc();
    1248       40017 :   MachineBasicBlock *MBB = CopyMI->getParent();
    1249             :   MachineBasicBlock::iterator MII =
    1250       40017 :     std::next(MachineBasicBlock::iterator(CopyMI));
    1251       40017 :   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
    1252       40017 :   MachineInstr &NewMI = *std::prev(MII);
    1253       40017 :   NewMI.setDebugLoc(DL);
    1254             : 
    1255             :   // In a situation like the following:
    1256             :   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
    1257             :   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
    1258             :   // instead of widening %1 to the register class of %0 simply do:
    1259             :   //     %1 = instr
    1260       40017 :   const TargetRegisterClass *NewRC = CP.getNewRC();
    1261       40017 :   if (DstIdx != 0) {
    1262         104 :     MachineOperand &DefMO = NewMI.getOperand(0);
    1263         104 :     if (DefMO.getSubReg() == DstIdx) {
    1264             :       assert(SrcIdx == 0 && CP.isFlipped()
    1265             :              && "Shouldn't have SrcIdx+DstIdx at this point");
    1266          58 :       const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
    1267             :       const TargetRegisterClass *CommonRC =
    1268          58 :         TRI->getCommonSubClass(DefRC, DstRC);
    1269          58 :       if (CommonRC != nullptr) {
    1270             :         NewRC = CommonRC;
    1271             :         DstIdx = 0;
    1272             :         DefMO.setSubReg(0);
    1273             :         DefMO.setIsUndef(false); // Only subregs can have def+undef.
    1274             :       }
    1275             :     }
    1276             :   }
    1277             : 
    1278             :   // CopyMI may have implicit operands, save them so that we can transfer them
    1279             :   // over to the newly materialized instruction after CopyMI is removed.
    1280             :   SmallVector<MachineOperand, 4> ImplicitOps;
    1281       40017 :   ImplicitOps.reserve(CopyMI->getNumOperands() -
    1282       40017 :                       CopyMI->getDesc().getNumOperands());
    1283       40018 :   for (unsigned I = CopyMI->getDesc().getNumOperands(),
    1284       40017 :                 E = CopyMI->getNumOperands();
    1285       40018 :        I != E; ++I) {
    1286           1 :     MachineOperand &MO = CopyMI->getOperand(I);
    1287           1 :     if (MO.isReg()) {
    1288             :       assert(MO.isImplicit() && "No explicit operands after implicit operands.");
    1289             :       // Discard VReg implicit defs.
    1290           2 :       if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
    1291           1 :         ImplicitOps.push_back(MO);
    1292             :     }
    1293             :   }
    1294             : 
    1295       40017 :   LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
    1296       40017 :   CopyMI->eraseFromParent();
    1297       40017 :   ErasedInstrs.insert(CopyMI);
    1298             : 
    1299             :   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
    1300             :   // We need to remember these so we can add intervals once we insert
    1301             :   // NewMI into SlotIndexes.
    1302             :   SmallVector<unsigned, 4> NewMIImplDefs;
    1303       53480 :   for (unsigned i = NewMI.getDesc().getNumOperands(),
    1304       40017 :                 e = NewMI.getNumOperands();
    1305       53480 :        i != e; ++i) {
    1306       13463 :     MachineOperand &MO = NewMI.getOperand(i);
    1307       13463 :     if (MO.isReg() && MO.isDef()) {
    1308             :       assert(MO.isImplicit() && MO.isDead() &&
    1309             :              TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
    1310       12389 :       NewMIImplDefs.push_back(MO.getReg());
    1311             :     }
    1312             :   }
    1313             : 
    1314       40017 :   if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
    1315        4933 :     unsigned NewIdx = NewMI.getOperand(0).getSubReg();
    1316             : 
    1317        4933 :     if (DefRC != nullptr) {
    1318        4933 :       if (NewIdx)
    1319        1112 :         NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
    1320             :       else
    1321        3821 :         NewRC = TRI->getCommonSubClass(NewRC, DefRC);
    1322             :       assert(NewRC && "subreg chosen for remat incompatible with instruction");
    1323             :     }
    1324             :     // Remap subranges to new lanemask and change register class.
    1325        4933 :     LiveInterval &DstInt = LIS->getInterval(DstReg);
    1326        4937 :     for (LiveInterval::SubRange &SR : DstInt.subranges()) {
    1327           8 :       SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
    1328             :     }
    1329        4933 :     MRI->setRegClass(DstReg, NewRC);
    1330             : 
    1331             :     // Update machine operands and add flags.
    1332        4933 :     updateRegDefsUses(DstReg, DstReg, DstIdx);
    1333        4933 :     NewMI.getOperand(0).setSubReg(NewIdx);
    1334             :     // updateRegDefUses can add an "undef" flag to the definition, since
    1335             :     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
    1336             :     // sure that "undef" is not set.
    1337        4933 :     if (NewIdx == 0)
    1338        3821 :       NewMI.getOperand(0).setIsUndef(false);
    1339             :     // Add dead subregister definitions if we are defining the whole register
    1340             :     // but only part of it is live.
    1341             :     // This could happen if the rematerialization instruction is rematerializing
    1342             :     // more than actually is used in the register.
    1343             :     // An example would be:
    1344             :     // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
    1345             :     // ; Copying only part of the register here, but the rest is undef.
    1346             :     // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
    1347             :     // ==>
    1348             :     // ; Materialize all the constants but only using one
    1349             :     // %2 = LOAD_CONSTANTS 5, 8
    1350             :     //
    1351             :     // at this point for the part that wasn't defined before we could have
    1352             :     // subranges missing the definition.
    1353        4933 :     if (NewIdx == 0 && DstInt.hasSubRanges()) {
    1354           1 :       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
    1355             :       SlotIndex DefIndex =
    1356           1 :           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
    1357           1 :       LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
    1358           1 :       VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
    1359           2 :       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
    1360           1 :         if (!SR.liveAt(DefIndex))
    1361           0 :           SR.createDeadDef(DefIndex, Alloc);
    1362           1 :         MaxMask &= ~SR.LaneMask;
    1363             :       }
    1364           1 :       if (MaxMask.any()) {
    1365           0 :         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
    1366           0 :         SR->createDeadDef(DefIndex, Alloc);
    1367             :       }
    1368             :     }
    1369             : 
    1370             :     // Make sure that the subrange for resultant undef is removed
    1371             :     // For example:
    1372             :     //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
    1373             :     //   %2 = COPY %1
    1374             :     // ==>
    1375             :     //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
    1376             :     //     ; Correct but need to remove the subrange for %2:sub0
    1377             :     //     ; as it is now undef
    1378        4933 :     if (NewIdx != 0 && DstInt.hasSubRanges()) {
    1379             :       // The affected subregister segments can be removed.
    1380           2 :       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
    1381           2 :       LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
    1382             :       bool UpdatedSubRanges = false;
    1383           6 :       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
    1384           8 :         if ((SR.LaneMask & DstMask).none()) {
    1385             :           LLVM_DEBUG(dbgs()
    1386             :                      << "Removing undefined SubRange "
    1387             :                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
    1388             :           // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
    1389           4 :           if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
    1390           1 :             SR.removeValNo(RmValNo);
    1391             :             UpdatedSubRanges = true;
    1392             :           }
    1393             :         }
    1394             :       }
    1395           2 :       if (UpdatedSubRanges)
    1396           1 :         DstInt.removeEmptySubRanges();
    1397             :     }
    1398       35084 :   } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
    1399             :     // The New instruction may be defining a sub-register of what's actually
    1400             :     // been asked for. If so it must implicitly define the whole thing.
    1401             :     assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
    1402             :            "Only expect virtual or physical registers in remat");
    1403             :     NewMI.getOperand(0).setIsDead(true);
    1404        7683 :     NewMI.addOperand(MachineOperand::CreateReg(
    1405             :         CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
    1406             :     // Record small dead def live-ranges for all the subregisters
    1407             :     // of the destination register.
    1408             :     // Otherwise, variables that live through may miss some
    1409             :     // interferences, thus creating invalid allocation.
    1410             :     // E.g., i386 code:
    1411             :     // %1 = somedef ; %1 GR8
    1412             :     // %2 = remat ; %2 GR32
    1413             :     // CL = COPY %2.sub_8bit
    1414             :     // = somedef %1 ; %1 GR8
    1415             :     // =>
    1416             :     // %1 = somedef ; %1 GR8
    1417             :     // dead ECX = remat ; implicit-def CL
    1418             :     // = somedef %1 ; %1 GR8
    1419             :     // %1 will see the interferences with CL but not with CH since
    1420             :     // no live-ranges would have been created for ECX.
    1421             :     // Fix that!
    1422        7683 :     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
    1423        7683 :     for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
    1424       30580 :          Units.isValid(); ++Units)
    1425       45794 :       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
    1426        5089 :         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
    1427             :   }
    1428             : 
    1429       80034 :   if (NewMI.getOperand(0).getSubReg())
    1430             :     NewMI.getOperand(0).setIsUndef();
    1431             : 
    1432             :   // Transfer over implicit operands to the rematerialized instruction.
    1433       40018 :   for (MachineOperand &MO : ImplicitOps)
    1434           1 :     NewMI.addOperand(MO);
    1435             : 
    1436       40017 :   SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
    1437       52406 :   for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
    1438       12389 :     unsigned Reg = NewMIImplDefs[i];
    1439       37167 :     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
    1440       24778 :       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
    1441          14 :         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
    1442             :   }
    1443             : 
    1444             :   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
    1445             :   ++NumReMats;
    1446             : 
    1447             :   // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
    1448             :   // to describe DstReg instead.
    1449       40017 :   if (MRI->use_nodbg_empty(SrcReg)) {
    1450       59203 :     for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
    1451         175 :       MachineInstr *UseMI = UseMO.getParent();
    1452         175 :       if (UseMI->isDebugValue()) {
    1453         175 :         if (TargetRegisterInfo::isPhysicalRegister(DstReg))
    1454         173 :           UseMO.substPhysReg(DstReg, *TRI);
    1455             :         else
    1456           2 :           UseMO.setReg(DstReg);
    1457             :         // Move the debug value directly after the def of the rematerialized
    1458             :         // value in DstReg.
    1459         350 :         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
    1460             :         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
    1461             :       }
    1462             :     }
    1463             :   }
    1464             : 
    1465             :   if (ToBeUpdated.count(SrcReg))
    1466        1355 :     return true;
    1467             : 
    1468             :   unsigned NumCopyUses = 0;
    1469      122165 :   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
    1470       44841 :     if (UseMO.getParent()->isCopyLike())
    1471       37432 :       NumCopyUses++;
    1472             :   }
    1473       38662 :   if (NumCopyUses < LateRematUpdateThreshold) {
    1474             :     // The source interval can become smaller because we removed a use.
    1475       38652 :     shrinkToUses(&SrcInt, &DeadDefs);
    1476       38652 :     if (!DeadDefs.empty())
    1477       29514 :       eliminateDeadDefs();
    1478             :   } else {
    1479             :     ToBeUpdated.insert(SrcReg);
    1480             :   }
    1481             :   return true;
    1482             : }
    1483             : 
    1484      911231 : MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
    1485             :   // ProcessImplicitDefs may leave some copies of <undef> values, it only
    1486             :   // removes local variables. When we have a copy like:
    1487             :   //
    1488             :   //   %1 = COPY undef %2
    1489             :   //
    1490             :   // We delete the copy and remove the corresponding value number from %1.
    1491             :   // Any uses of that value number are marked as <undef>.
    1492             : 
    1493             :   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
    1494             :   // CoalescerPair may have a new register class with adjusted subreg indices
    1495             :   // at this point.
    1496             :   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
    1497      911231 :   isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
    1498             : 
    1499      911231 :   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
    1500      911231 :   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
    1501             :   // CopyMI is undef iff SrcReg is not live before the instruction.
    1502      911231 :   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
    1503       94251 :     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
    1504      254865 :     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
    1505      509730 :       if ((SR.LaneMask & SrcMask).none())
    1506             :         continue;
    1507       94251 :       if (SR.liveAt(Idx))
    1508             :         return nullptr;
    1509             :     }
    1510      816980 :   } else if (SrcLI.liveAt(Idx))
    1511             :     return nullptr;
    1512             : 
    1513             :   // If the undef copy defines a live-out value (i.e. an input to a PHI def),
    1514             :   // then replace it with an IMPLICIT_DEF.
    1515           5 :   LiveInterval &DstLI = LIS->getInterval(DstReg);
    1516             :   SlotIndex RegIndex = Idx.getRegSlot();
    1517          10 :   LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
    1518             :   assert(Seg != nullptr && "No segment for defining instruction");
    1519           5 :   if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
    1520           2 :     if (V->isPHIDef()) {
    1521           2 :       CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
    1522           6 :       for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
    1523           4 :         MachineOperand &MO = CopyMI->getOperand(i-1);
    1524           4 :         if (MO.isReg() && MO.isUse())
    1525           2 :           CopyMI->RemoveOperand(i-1);
    1526             :       }
    1527             :       LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
    1528             :                            "implicit def\n");
    1529             :       return CopyMI;
    1530             :     }
    1531             :   }
    1532             : 
    1533             :   // Remove any DstReg segments starting at the instruction.
    1534             :   LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
    1535             : 
    1536             :   // Remove value or merge with previous one in case of a subregister def.
    1537           3 :   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
    1538           1 :     VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
    1539           1 :     DstLI.MergeValueNumberInto(VNI, PrevVNI);
    1540             : 
    1541             :     // The affected subregister segments can be removed.
    1542           1 :     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
    1543           3 :     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
    1544           4 :       if ((SR.LaneMask & DstMask).none())
    1545             :         continue;
    1546             : 
    1547           1 :       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
    1548             :       assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
    1549           1 :       SR.removeValNo(SVNI);
    1550             :     }
    1551           1 :     DstLI.removeEmptySubRanges();
    1552             :   } else
    1553           2 :     LIS->removeVRegDefAt(DstLI, RegIndex);
    1554             : 
    1555             :   // Mark uses as undef.
    1556          14 :   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
    1557           8 :     if (MO.isDef() /*|| MO.isUndef()*/)
    1558           6 :       continue;
    1559           4 :     const MachineInstr &MI = *MO.getParent();
    1560           4 :     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
    1561           8 :     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
    1562             :     bool isLive;
    1563           4 :     if (!UseMask.all() && DstLI.hasSubRanges()) {
    1564             :       isLive = false;
    1565           0 :       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
    1566           0 :         if ((SR.LaneMask & UseMask).none())
    1567             :           continue;
    1568           0 :         if (SR.liveAt(UseIdx)) {
    1569             :           isLive = true;
    1570             :           break;
    1571             :         }
    1572             :       }
    1573             :     } else
    1574           4 :       isLive = DstLI.liveAt(UseIdx);
    1575           4 :     if (isLive)
    1576             :       continue;
    1577             :     MO.setIsUndef(true);
    1578             :     LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
    1579             :   }
    1580             : 
    1581             :   // A def of a subregister may be a use of the other subregisters, so
    1582             :   // deleting a def of a subregister may also remove uses. Since CopyMI
    1583             :   // is still part of the function (but about to be erased), mark all
    1584             :   // defs of DstReg in it as <undef>, so that shrinkToUses would
    1585             :   // ignore them.
    1586           9 :   for (MachineOperand &MO : CopyMI->operands())
    1587           6 :     if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
    1588             :       MO.setIsUndef(true);
    1589           3 :   LIS->shrinkToUses(&DstLI);
    1590             : 
    1591           3 :   return CopyMI;
    1592             : }
    1593             : 
    1594           0 : void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
    1595             :                                      MachineOperand &MO, unsigned SubRegIdx) {
    1596           0 :   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
    1597           0 :   if (MO.isDef())
    1598             :     Mask = ~Mask;
    1599             :   bool IsUndef = true;
    1600           0 :   for (const LiveInterval::SubRange &S : Int.subranges()) {
    1601           0 :     if ((S.LaneMask & Mask).none())
    1602           0 :       continue;
    1603           0 :     if (S.liveAt(UseIdx)) {
    1604             :       IsUndef = false;
    1605             :       break;
    1606             :     }
    1607             :   }
    1608           0 :   if (IsUndef) {
    1609             :     MO.setIsUndef(true);
    1610             :     // We found out some subregister use is actually reading an undefined
    1611             :     // value. In some cases the whole vreg has become undefined at this
    1612             :     // point so we have to potentially shrink the main range if the
    1613             :     // use was ending a live segment there.
    1614           0 :     LiveQueryResult Q = Int.Query(UseIdx);
    1615           0 :     if (Q.valueOut() == nullptr)
    1616           0 :       ShrinkMainRange = true;
    1617             :   }
    1618           0 : }
    1619             : 
    1620      851350 : void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
    1621             :                                           unsigned DstReg,
    1622             :                                           unsigned SubIdx) {
    1623             :   bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
    1624      851350 :   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
    1625             : 
    1626      804706 :   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
    1627      989667 :     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
    1628             :       unsigned SubReg = MO.getSubReg();
    1629      673081 :       if (SubReg == 0 || MO.isUndef())
    1630      218165 :         continue;
    1631      454916 :       MachineInstr &MI = *MO.getParent();
    1632      454916 :       if (MI.isDebugValue())
    1633             :         continue;
    1634      454916 :       SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
    1635      454916 :       addUndefFlag(*DstInt, UseIdx, MO, SubReg);
    1636             :     }
    1637             :   }
    1638             : 
    1639             :   SmallPtrSet<MachineInstr*, 8> Visited;
    1640             :   for (MachineRegisterInfo::reg_instr_iterator
    1641      851350 :        I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
    1642     2090211 :        I != E; ) {
    1643             :     MachineInstr *UseMI = &*(I++);
    1644             : 
    1645             :     // Each instruction can only be rewritten once because sub-register
    1646             :     // composition is not always idempotent. When SrcReg != DstReg, rewriting
    1647             :     // the UseMI operands removes them from the SrcReg use-def chain, but when
    1648             :     // SrcReg is DstReg we could encounter UseMI twice if it has multiple
    1649             :     // operands mentioning the virtual register.
    1650     1238861 :     if (SrcReg == DstReg && !Visited.insert(UseMI).second)
    1651        2144 :       continue;
    1652             : 
    1653             :     SmallVector<unsigned,8> Ops;
    1654             :     bool Reads, Writes;
    1655     1236717 :     std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
    1656             : 
    1657             :     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
    1658             :     // because SrcReg is a sub-register.
    1659     1236717 :     if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
    1660      140497 :       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
    1661             : 
    1662             :     // Replace SrcReg with DstReg in all UseMI operands.
    1663     2572923 :     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
    1664     2672412 :       MachineOperand &MO = UseMI->getOperand(Ops[i]);
    1665             : 
    1666             :       // Adjust <undef> flags in case of sub-register joins. We don't want to
    1667             :       // turn a full def into a read-modify-write sub-register def and vice
    1668             :       // versa.
    1669     1336206 :       if (SubIdx && MO.isDef())
    1670      151726 :         MO.setIsUndef(!Reads);
    1671             : 
    1672             :       // A subreg use of a partially undef (super) register may be a complete
    1673             :       // undef use now and then has to be marked that way.
    1674     1336206 :       if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
    1675       82772 :         if (!DstInt->hasSubRanges()) {
    1676           1 :           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
    1677           1 :           LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
    1678           1 :           DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
    1679             :         }
    1680             :         SlotIndex MIIdx = UseMI->isDebugValue()
    1681           4 :                               ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
    1682       82772 :                               : LIS->getInstructionIndex(*UseMI);
    1683       82772 :         SlotIndex UseIdx = MIIdx.getRegSlot(true);
    1684       82772 :         addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
    1685             :       }
    1686             : 
    1687     1336206 :       if (DstIsPhys)
    1688       74153 :         MO.substPhysReg(DstReg, *TRI);
    1689             :       else
    1690     1262053 :         MO.substVirtReg(DstReg, SubIdx, *TRI);
    1691             :     }
    1692             : 
    1693             :     LLVM_DEBUG({
    1694             :       dbgs() << "\t\tupdated: ";
    1695             :       if (!UseMI->isDebugValue())
    1696             :         dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
    1697             :       dbgs() << *UseMI;
    1698             :     });
    1699             :   }
    1700      851350 : }
    1701             : 
    1702           0 : bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
    1703             :   // Always join simple intervals that are defined by a single copy from a
    1704             :   // reserved register. This doesn't increase register pressure, so it is
    1705             :   // always beneficial.
    1706           0 :   if (!MRI->isReserved(CP.getDstReg())) {
    1707             :     LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
    1708           0 :     return false;
    1709             :   }
    1710             : 
    1711           0 :   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
    1712           0 :   if (JoinVInt.containsOneValue())
    1713           0 :     return true;
    1714             : 
    1715             :   LLVM_DEBUG(
    1716             :       dbgs() << "\tCannot join complex intervals into reserved register.\n");
    1717             :   return false;
    1718             : }
    1719             : 
    1720     1849444 : bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
    1721     1849444 :   Again = false;
    1722             :   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
    1723             : 
    1724     1849444 :   CoalescerPair CP(*TRI);
    1725     1849444 :   if (!CP.setRegisters(CopyMI)) {
    1726             :     LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
    1727             :     return false;
    1728             :   }
    1729             : 
    1730     1792455 :   if (CP.getNewRC()) {
    1731      913046 :     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
    1732      913046 :     auto DstRC = MRI->getRegClass(CP.getDstReg());
    1733      913046 :     unsigned SrcIdx = CP.getSrcIdx();
    1734      913046 :     unsigned DstIdx = CP.getDstIdx();
    1735      913046 :     if (CP.isFlipped()) {
    1736             :       std::swap(SrcIdx, DstIdx);
    1737             :       std::swap(SrcRC, DstRC);
    1738             :     }
    1739      913046 :     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
    1740      913046 :                              CP.getNewRC(), *LIS)) {
    1741             :       LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
    1742             :       return false;
    1743             :     }
    1744             :   }
    1745             : 
    1746             :   // Dead code elimination. This really should be handled by MachineDCE, but
    1747             :   // sometimes dead copies slip through, and we can't generate invalid live
    1748             :   // ranges.
    1749     1791069 :   if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
    1750             :     LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
    1751         429 :     DeadDefs.push_back(CopyMI);
    1752         429 :     eliminateDeadDefs();
    1753         429 :     return true;
    1754             :   }
    1755             : 
    1756             :   // Eliminate undefs.
    1757     1790640 :   if (!CP.isPhys()) {
    1758             :     // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
    1759      911231 :     if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
    1760           5 :       if (UndefMI->isImplicitDef())
    1761             :         return false;
    1762           3 :       deleteInstr(CopyMI);
    1763           3 :       return false;  // Not coalescable.
    1764             :     }
    1765             :   }
    1766             : 
    1767             :   // Coalesced copies are normally removed immediately, but transformations
    1768             :   // like removeCopyByCommutingDef() can inadvertently create identity copies.
    1769             :   // When that happens, just join the values and remove the copy.
    1770     1790635 :   if (CP.getSrcReg() == CP.getDstReg()) {
    1771           0 :     LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
    1772             :     LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
    1773           0 :     const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
    1774           0 :     LiveQueryResult LRQ = LI.Query(CopyIdx);
    1775           0 :     if (VNInfo *DefVNI = LRQ.valueDefined()) {
    1776             :       VNInfo *ReadVNI = LRQ.valueIn();
    1777             :       assert(ReadVNI && "No value before copy and no <undef> flag.");
    1778             :       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
    1779           0 :       LI.MergeValueNumberInto(DefVNI, ReadVNI);
    1780             : 
    1781             :       // Process subregister liveranges.
    1782           0 :       for (LiveInterval::SubRange &S : LI.subranges()) {
    1783           0 :         LiveQueryResult SLRQ = S.Query(CopyIdx);
    1784           0 :         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
    1785             :           VNInfo *SReadVNI = SLRQ.valueIn();
    1786           0 :           S.MergeValueNumberInto(SDefVNI, SReadVNI);
    1787             :         }
    1788             :       }
    1789             :       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
    1790             :     }
    1791           0 :     deleteInstr(CopyMI);
    1792             :     return true;
    1793             :   }
    1794             : 
    1795             :   // Enforce policies.
    1796     1790635 :   if (CP.isPhys()) {
    1797             :     LLVM_DEBUG(dbgs() << "\tConsidering merging "
    1798             :                       << printReg(CP.getSrcReg(), TRI) << " with "
    1799             :                       << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
    1800      879409 :     if (!canJoinPhys(CP)) {
    1801             :       // Before giving up coalescing, if definition of source is defined by
    1802             :       // trivial computation, try rematerializing it.
    1803             :       bool IsDefCopy;
    1804      830615 :       if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
    1805             :         return true;
    1806      795531 :       if (IsDefCopy)
    1807      126855 :         Again = true;  // May be possible to coalesce later.
    1808      795531 :       return false;
    1809             :     }
    1810             :   } else {
    1811             :     // When possible, let DstReg be the larger interval.
    1812     1523053 :     if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
    1813      611827 :                            LIS->getInterval(CP.getDstReg()).size())
    1814      166364 :       CP.flip();
    1815             : 
    1816             :     LLVM_DEBUG({
    1817             :       dbgs() << "\tConsidering merging to "
    1818             :              << TRI->getRegClassName(CP.getNewRC()) << " with ";
    1819             :       if (CP.getDstIdx() && CP.getSrcIdx())
    1820             :         dbgs() << printReg(CP.getDstReg()) << " in "
    1821             :                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
    1822             :                << printReg(CP.getSrcReg()) << " in "
    1823             :                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
    1824             :       else
    1825             :         dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
    1826             :                << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
    1827             :     });
    1828             :   }
    1829             : 
    1830      960019 :   ShrinkMask = LaneBitmask::getNone();
    1831      960019 :   ShrinkMainRange = false;
    1832             : 
    1833             :   // Okay, attempt to join these two intervals.  On failure, this returns false.
    1834             :   // Otherwise, if one of the intervals being joined is a physreg, this method
    1835             :   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
    1836             :   // been modified, so we can use this information below to update aliases.
    1837      960019 :   if (!joinIntervals(CP)) {
    1838             :     // Coalescing failed.
    1839             : 
    1840             :     // If definition of source is defined by trivial computation, try
    1841             :     // rematerializing it.
    1842             :     bool IsDefCopy;
    1843      113625 :     if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
    1844             :       return true;
    1845             : 
    1846             :     // If we can eliminate the copy without merging the live segments, do so
    1847             :     // now.
    1848      108692 :     if (!CP.isPartial() && !CP.isPhys()) {
    1849       73288 :       bool Changed = adjustCopiesBackFrom(CP, CopyMI);
    1850       73288 :       bool Shrink = false;
    1851       73288 :       if (!Changed)
    1852       73274 :         std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
    1853       73288 :       if (Changed) {
    1854         114 :         deleteInstr(CopyMI);
    1855         114 :         if (Shrink) {
    1856           1 :           unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
    1857           1 :           LiveInterval &DstLI = LIS->getInterval(DstReg);
    1858           1 :           shrinkToUses(&DstLI);
    1859             :           LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
    1860             :         }
    1861             :         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
    1862         114 :         return true;
    1863             :       }
    1864             :     }
    1865             : 
    1866             :     // Try and see if we can partially eliminate the copy by moving the copy to
    1867             :     // its predecessor.
    1868      108578 :     if (!CP.isPartial() && !CP.isPhys())
    1869       73174 :       if (removePartialRedundancy(CP, *CopyMI))
    1870             :         return true;
    1871             : 
    1872             :     // Otherwise, we are unable to join the intervals.
    1873             :     LLVM_DEBUG(dbgs() << "\tInterference!\n");
    1874      108463 :     Again = true;  // May be possible to coalesce later.
    1875      108463 :     return false;
    1876             :   }
    1877             : 
    1878             :   // Coalescing to a virtual register that is of a sub-register class of the
    1879             :   // other. Make sure the resulting register is set to the right register class.
    1880      846395 :   if (CP.isCrossClass()) {
    1881             :     ++numCrossRCs;
    1882      284561 :     MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
    1883             :   }
    1884             : 
    1885             :   // Removing sub-register copies can ease the register class constraints.
    1886             :   // Make sure we attempt to inflate the register class of DstReg.
    1887      846395 :   if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
    1888       18255 :     InflateRegs.push_back(CP.getDstReg());
    1889             : 
    1890             :   // CopyMI has been erased by joinIntervals at this point. Remove it from
    1891             :   // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
    1892             :   // to the work list. This keeps ErasedInstrs from growing needlessly.
    1893      846395 :   ErasedInstrs.erase(CopyMI);
    1894             : 
    1895             :   // Rewrite all SrcReg operands to DstReg.
    1896             :   // Also update DstReg operands to include DstIdx if it is set.
    1897      846395 :   if (CP.getDstIdx())
    1898          22 :     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
    1899      846395 :   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
    1900             : 
    1901             :   // Shrink subregister ranges if necessary.
    1902      846395 :   if (ShrinkMask.any()) {
    1903          67 :     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
    1904         240 :     for (LiveInterval::SubRange &S : LI.subranges()) {
    1905         346 :       if ((S.LaneMask & ShrinkMask).none())
    1906             :         continue;
    1907             :       LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
    1908             :                         << ")\n");
    1909          80 :       LIS->shrinkToUses(S, LI.reg);
    1910             :     }
    1911          67 :     LI.removeEmptySubRanges();
    1912             :   }
    1913      846395 :   if (ShrinkMainRange) {
    1914           0 :     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
    1915           0 :     shrinkToUses(&LI);
    1916             :   }
    1917             : 
    1918             :   // SrcReg is guaranteed to be the register whose live interval that is
    1919             :   // being merged.
    1920      846395 :   LIS->removeInterval(CP.getSrcReg());
    1921             : 
    1922             :   // Update regalloc hint.
    1923      846395 :   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
    1924             : 
    1925             :   LLVM_DEBUG({
    1926             :     dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
    1927             :            << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
    1928             :     dbgs() << "\tResult = ";
    1929             :     if (CP.isPhys())
    1930             :       dbgs() << printReg(CP.getDstReg(), TRI);
    1931             :     else
    1932             :       dbgs() << LIS->getInterval(CP.getDstReg());
    1933             :     dbgs() << '\n';
    1934             :   });
    1935             : 
    1936             :   ++numJoins;
    1937      846395 :   return true;
    1938             : }
    1939             : 
    1940       48794 : bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
    1941       48794 :   unsigned DstReg = CP.getDstReg();
    1942       48794 :   unsigned SrcReg = CP.getSrcReg();
    1943             :   assert(CP.isPhys() && "Must be a physreg copy");
    1944             :   assert(MRI->isReserved(DstReg) && "Not a reserved register");
    1945       48794 :   LiveInterval &RHS = LIS->getInterval(SrcReg);
    1946             :   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
    1947             : 
    1948             :   assert(RHS.containsOneValue() && "Invalid join with reserved register");
    1949             : 
    1950             :   // Optimization for reserved registers like ESP. We can only merge with a
    1951             :   // reserved physreg if RHS has a single value that is a copy of DstReg.
    1952             :   // The live range of the reserved register will look like a set of dead defs
    1953             :   // - we don't properly track the live range of reserved registers.
    1954             : 
    1955             :   // Deny any overlapping intervals.  This depends on all the reserved
    1956             :   // register live ranges to look like dead defs.
    1957       48794 :   if (!MRI->isConstantPhysReg(DstReg)) {
    1958      225782 :     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
    1959             :       // Abort if not all the regunits are reserved.
    1960      401708 :       for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
    1961      267820 :         if (!MRI->isReserved(*RI))
    1962             :           return false;
    1963             :       }
    1964      267776 :       if (RHS.overlaps(LIS->getRegUnit(*UI))) {
    1965             :         LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
    1966             :                           << '\n');
    1967             :         return false;
    1968             :       }
    1969             :     }
    1970             : 
    1971             :     // We must also check for overlaps with regmask clobbers.
    1972             :     BitVector RegMaskUsable;
    1973       45061 :     if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
    1974             :         !RegMaskUsable.test(DstReg)) {
    1975             :       LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
    1976             :       return false;
    1977             :     }
    1978             :   }
    1979             : 
    1980             :   // Skip any value computations, we are not adding new values to the
    1981             :   // reserved register.  Also skip merging the live ranges, the reserved
    1982             :   // register live range doesn't need to be accurate as long as all the
    1983             :   // defs are there.
    1984             : 
    1985             :   // Delete the identity copy.
    1986             :   MachineInstr *CopyMI;
    1987       46668 :   if (CP.isFlipped()) {
    1988             :     // Physreg is copied into vreg
    1989             :     //   %y = COPY %physreg_x
    1990             :     //   ...  //< no other def of %x here
    1991             :     //   use %y
    1992             :     // =>
    1993             :     //   ...
    1994             :     //   use %x
    1995       46598 :     CopyMI = MRI->getVRegDef(SrcReg);
    1996             :   } else {
    1997             :     // VReg is copied into physreg:
    1998             :     //   %y = def
    1999             :     //   ... //< no other def or use of %y here
    2000             :     //   %y = COPY %physreg_x
    2001             :     // =>
    2002             :     //   %y = def
    2003             :     //   ...
    2004          70 :     if (!MRI->hasOneNonDBGUse(SrcReg)) {
    2005             :       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
    2006             :       return false;
    2007             :     }
    2008             : 
    2009          62 :     if (!LIS->intervalIsInOneMBB(RHS)) {
    2010             :       LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
    2011             :       return false;
    2012             :     }
    2013             : 
    2014          49 :     MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
    2015          49 :     CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
    2016          49 :     SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
    2017          49 :     SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
    2018             : 
    2019          49 :     if (!MRI->isConstantPhysReg(DstReg)) {
    2020             :       // We checked above that there are no interfering defs of the physical
    2021             :       // register. However, for this case, where we intend to move up the def of
    2022             :       // the physical register, we also need to check for interfering uses.
    2023          48 :       SlotIndexes *Indexes = LIS->getSlotIndexes();
    2024             :       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
    2025          92 :            SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
    2026             :         MachineInstr *MI = LIS->getInstructionFromIndex(SI);
    2027          47 :         if (MI->readsRegister(DstReg, TRI)) {
    2028             :           LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
    2029             :           return false;
    2030             :         }
    2031             :       }
    2032             :     }
    2033             : 
    2034             :     // We're going to remove the copy which defines a physical reserved
    2035             :     // register, so remove its valno, etc.
    2036             :     LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
    2037             :                       << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
    2038             : 
    2039          46 :     LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
    2040             :     // Create a new dead def at the new def location.
    2041         156 :     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
    2042         128 :       LiveRange &LR = LIS->getRegUnit(*UI);
    2043         128 :       LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
    2044             :     }
    2045             :   }
    2046             : 
    2047       46644 :   deleteInstr(CopyMI);
    2048             : 
    2049             :   // We don't track kills for reserved registers.
    2050       46644 :   MRI->clearKillFlags(CP.getSrcReg());
    2051             : 
    2052       46644 :   return true;
    2053             : }
    2054             : 
    2055             : //===----------------------------------------------------------------------===//
    2056             : //                 Interference checking and interval joining
    2057             : //===----------------------------------------------------------------------===//
    2058             : //
    2059             : // In the easiest case, the two live ranges being joined are disjoint, and
    2060             : // there is no interference to consider. It is quite common, though, to have
    2061             : // overlapping live ranges, and we need to check if the interference can be
    2062             : // resolved.
    2063             : //
    2064             : // The live range of a single SSA value forms a sub-tree of the dominator tree.
    2065             : // This means that two SSA values overlap if and only if the def of one value
    2066             : // is contained in the live range of the other value. As a special case, the
    2067             : // overlapping values can be defined at the same index.
    2068             : //
    2069             : // The interference from an overlapping def can be resolved in these cases:
    2070             : //
    2071             : // 1. Coalescable copies. The value is defined by a copy that would become an
    2072             : //    identity copy after joining SrcReg and DstReg. The copy instruction will
    2073             : //    be removed, and the value will be merged with the source value.
    2074             : //
    2075             : //    There can be several copies back and forth, causing many values to be
    2076             : //    merged into one. We compute a list of ultimate values in the joined live
    2077             : //    range as well as a mappings from the old value numbers.
    2078             : //
    2079             : // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
    2080             : //    predecessors have a live out value. It doesn't cause real interference,
    2081             : //    and can be merged into the value it overlaps. Like a coalescable copy, it
    2082             : //    can be erased after joining.
    2083             : //
    2084             : // 3. Copy of external value. The overlapping def may be a copy of a value that
    2085             : //    is already in the other register. This is like a coalescable copy, but
    2086             : //    the live range of the source register must be trimmed after erasing the
    2087             : //    copy instruction:
    2088             : //
    2089             : //      %src = COPY %ext
    2090             : //      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
    2091             : //
    2092             : // 4. Clobbering undefined lanes. Vector registers are sometimes built by
    2093             : //    defining one lane at a time:
    2094             : //
    2095             : //      %dst:ssub0<def,read-undef> = FOO
    2096             : //      %src = BAR
    2097             : //      %dst:ssub1 = COPY %src
    2098             : //
    2099             : //    The live range of %src overlaps the %dst value defined by FOO, but
    2100             : //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
    2101             : //    which was undef anyway.
    2102             : //
    2103             : //    The value mapping is more complicated in this case. The final live range
    2104             : //    will have different value numbers for both FOO and BAR, but there is no
    2105             : //    simple mapping from old to new values. It may even be necessary to add
    2106             : //    new PHI values.
    2107             : //
    2108             : // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
    2109             : //    is live, but never read. This can happen because we don't compute
    2110             : //    individual live ranges per lane.
    2111             : //
    2112             : //      %dst = FOO
    2113             : //      %src = BAR
    2114             : //      %dst:ssub1 = COPY %src
    2115             : //
    2116             : //    This kind of interference is only resolved locally. If the clobbered
    2117             : //    lane value escapes the block, the join is aborted.
    2118             : 
    2119             : namespace {
    2120             : 
    2121             : /// Track information about values in a single virtual register about to be
    2122             : /// joined. Objects of this class are always created in pairs - one for each
    2123             : /// side of the CoalescerPair (or one for each lane of a side of the coalescer
    2124             : /// pair)
    2125             : class JoinVals {
    2126             :   /// Live range we work on.
    2127             :   LiveRange &LR;
    2128             : 
    2129             :   /// (Main) register we work on.
    2130             :   const unsigned Reg;
    2131             : 
    2132             :   /// Reg (and therefore the values in this liverange) will end up as
    2133             :   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
    2134             :   /// CP.SrcIdx.
    2135             :   const unsigned SubIdx;
    2136             : 
    2137             :   /// The LaneMask that this liverange will occupy the coalesced register. May
    2138             :   /// be smaller than the lanemask produced by SubIdx when merging subranges.
    2139             :   const LaneBitmask LaneMask;
    2140             : 
    2141             :   /// This is true when joining sub register ranges, false when joining main
    2142             :   /// ranges.
    2143             :   const bool SubRangeJoin;
    2144             : 
    2145             :   /// Whether the current LiveInterval tracks subregister liveness.
    2146             :   const bool TrackSubRegLiveness;
    2147             : 
    2148             :   /// Values that will be present in the final live range.
    2149             :   SmallVectorImpl<VNInfo*> &NewVNInfo;
    2150             : 
    2151             :   const CoalescerPair &CP;
    2152             :   LiveIntervals *LIS;
    2153             :   SlotIndexes *Indexes;
    2154             :   const TargetRegisterInfo *TRI;
    2155             : 
    2156             :   /// Value number assignments. Maps value numbers in LI to entries in
    2157             :   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
    2158             :   SmallVector<int, 8> Assignments;
    2159             : 
    2160             :   /// Conflict resolution for overlapping values.
    2161             :   enum ConflictResolution {
    2162             :     /// No overlap, simply keep this value.
    2163             :     CR_Keep,
    2164             : 
    2165             :     /// Merge this value into OtherVNI and erase the defining instruction.
    2166             :     /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
    2167             :     /// values.
    2168             :     CR_Erase,
    2169             : 
    2170             :     /// Merge this value into OtherVNI but keep the defining instruction.
    2171             :     /// This is for the special case where OtherVNI is defined by the same
    2172             :     /// instruction.
    2173             :     CR_Merge,
    2174             : 
    2175             :     /// Keep this value, and have it replace OtherVNI where possible. This
    2176             :     /// complicates value mapping since OtherVNI maps to two different values
    2177             :     /// before and after this def.
    2178             :     /// Used when clobbering undefined or dead lanes.
    2179             :     CR_Replace,
    2180             : 
    2181             :     /// Unresolved conflict. Visit later when all values have been mapped.
    2182             :     CR_Unresolved,
    2183             : 
    2184             :     /// Unresolvable conflict. Abort the join.
    2185             :     CR_Impossible
    2186             :   };
    2187             : 
    2188             :   /// Per-value info for LI. The lane bit masks are all relative to the final
    2189             :   /// joined register, so they can be compared directly between SrcReg and
    2190             :   /// DstReg.
    2191             :   struct Val {
    2192             :     ConflictResolution Resolution = CR_Keep;
    2193             : 
    2194             :     /// Lanes written by this def, 0 for unanalyzed values.
    2195             :     LaneBitmask WriteLanes;
    2196             : 
    2197             :     /// Lanes with defined values in this register. Other lanes are undef and
    2198             :     /// safe to clobber.
    2199             :     LaneBitmask ValidLanes;
    2200             : 
    2201             :     /// Value in LI being redefined by this def.
    2202             :     VNInfo *RedefVNI = nullptr;
    2203             : 
    2204             :     /// Value in the other live range that overlaps this def, if any.
    2205             :     VNInfo *OtherVNI = nullptr;
    2206             : 
    2207             :     /// Is this value an IMPLICIT_DEF that can be erased?
    2208             :     ///
    2209             :     /// IMPLICIT_DEF values should only exist at the end of a basic block that
    2210             :     /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
    2211             :     /// safely erased if they are overlapping a live value in the other live
    2212             :     /// interval.
    2213             :     ///
    2214             :     /// Weird control flow graphs and incomplete PHI handling in
    2215             :     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
    2216             :     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
    2217             :     /// normal values.
    2218             :     bool ErasableImplicitDef = false;
    2219             : 
    2220             :     /// True when the live range of this value will be pruned because of an
    2221             :     /// overlapping CR_Replace value in the other live range.
    2222             :     bool Pruned = false;
    2223             : 
    2224             :     /// True once Pruned above has been computed.
    2225             :     bool PrunedComputed = false;
    2226             : 
    2227             :     /// True if this value is determined to be identical to OtherVNI
    2228             :     /// (in valuesIdentical). This is used with CR_Erase where the erased
    2229             :     /// copy is redundant, i.e. the source value is already the same as
    2230             :     /// the destination. In such cases the subranges need to be updated
    2231             :     /// properly. See comment at pruneSubRegValues for more info.
    2232             :     bool Identical = false;
    2233             : 
    2234             :     Val() = default;
    2235             : 
    2236     7203958 :     bool isAnalyzed() const { return WriteLanes.any(); }
    2237             :   };
    2238             : 
    2239             :   /// One entry per value number in LI.
    2240             :   SmallVector<Val, 8> Vals;
    2241             : 
    2242             :   /// Compute the bitmask of lanes actually written by DefMI.
    2243             :   /// Set Redef if there are any partial register definitions that depend on the
    2244             :   /// previous value of the register.
    2245             :   LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
    2246             : 
    2247             :   /// Find the ultimate value that VNI was copied from.
    2248             :   std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
    2249             : 
    2250             :   bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
    2251             : 
    2252             :   /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
    2253             :   /// Return a conflict resolution when possible, but leave the hard cases as
    2254             :   /// CR_Unresolved.
    2255             :   /// Recursively calls computeAssignment() on this and Other, guaranteeing that
    2256             :   /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
    2257             :   /// The recursion always goes upwards in the dominator tree, making loops
    2258             :   /// impossible.
    2259             :   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
    2260             : 
    2261             :   /// Compute the value assignment for ValNo in RI.
    2262             :   /// This may be called recursively by analyzeValue(), but never for a ValNo on
    2263             :   /// the stack.
    2264             :   void computeAssignment(unsigned ValNo, JoinVals &Other);
    2265             : 
    2266             :   /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
    2267             :   /// the extent of the tainted lanes in the block.
    2268             :   ///
    2269             :   /// Multiple values in Other.LR can be affected since partial redefinitions
    2270             :   /// can preserve previously tainted lanes.
    2271             :   ///
    2272             :   ///   1 %dst = VLOAD           <-- Define all lanes in %dst
    2273             :   ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
    2274             :   ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
    2275             :   ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
    2276             :   ///
    2277             :   /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
    2278             :   /// entry to TaintedVals.
    2279             :   ///
    2280             :   /// Returns false if the tainted lanes extend beyond the basic block.
    2281             :   bool
    2282             :   taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
    2283             :               SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
    2284             : 
    2285             :   /// Return true if MI uses any of the given Lanes from Reg.
    2286             :   /// This does not include partial redefinitions of Reg.
    2287             :   bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
    2288             : 
    2289             :   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
    2290             :   /// be pruned:
    2291             :   ///
    2292             :   ///   %dst = COPY %src
    2293             :   ///   %src = COPY %dst  <-- This value to be pruned.
    2294             :   ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
    2295             :   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
    2296             : 
    2297             : public:
    2298     2188014 :   JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
    2299             :            SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
    2300             :            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
    2301             :            bool TrackSubRegLiveness)
    2302     2188014 :     : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
    2303             :       SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
    2304     2188014 :       NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
    2305     4376028 :       TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
    2306             : 
    2307             :   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
    2308             :   /// Returns false if any conflicts were impossible to resolve.
    2309             :   bool mapValues(JoinVals &Other);
    2310             : 
    2311             :   /// Try to resolve conflicts that require all values to be mapped.
    2312             :   /// Returns false if any conflicts were impossible to resolve.
    2313             :   bool resolveConflicts(JoinVals &Other);
    2314             : 
    2315             :   /// Prune the live range of values in Other.LR where they would conflict with
    2316             :   /// CR_Replace values in LR. Collect end points for restoring the live range
    2317             :   /// after joining.
    2318             :   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
    2319             :                    bool changeInstrs);
    2320             : 
    2321             :   /// Removes subranges starting at copies that get removed. This sometimes
    2322             :   /// happens when undefined subranges are copied around. These ranges contain
    2323             :   /// no useful information and can be removed.
    2324             :   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
    2325             : 
    2326             :   /// Pruning values in subranges can lead to removing segments in these
    2327             :   /// subranges started by IMPLICIT_DEFs. The corresponding segments in
    2328             :   /// the main range also need to be removed. This function will mark
    2329             :   /// the corresponding values in the main range as pruned, so that
    2330             :   /// eraseInstrs can do the final cleanup.
    2331             :   /// The parameter @p LI must be the interval whose main range is the
    2332             :   /// live range LR.
    2333             :   void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
    2334             : 
    2335             :   /// Erase any machine instructions that have been coalesced away.
    2336             :   /// Add erased instructions to ErasedInstrs.
    2337             :   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
    2338             :   /// the erased instrs.
    2339             :   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
    2340             :                    SmallVectorImpl<unsigned> &ShrinkRegs,
    2341             :                    LiveInterval *LI = nullptr);
    2342             : 
    2343             :   /// Remove liverange defs at places where implicit defs will be removed.
    2344             :   void removeImplicitDefs();
    2345             : 
    2346             :   /// Get the value assignments suitable for passing to LiveInterval::join.
    2347      799751 :   const int *getAssignments() const { return Assignments.data(); }
    2348             : };
    2349             : 
    2350             : } // end anonymous namespace
    2351             : 
    2352     4068521 : LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
    2353             :   const {
    2354             :   LaneBitmask L;
    2355    15818671 :   for (const MachineOperand &MO : DefMI->operands()) {
    2356    11750150 :     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
    2357             :       continue;
    2358     4068559 :     L |= TRI->getSubRegIndexLaneMask(
    2359     4350353 :            TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
    2360             :     if (MO.readsReg())
    2361      418046 :       Redef = true;
    2362             :   }
    2363     4068521 :   return L;
    2364             : }
    2365             : 
    2366       47402 : std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
    2367             :     const VNInfo *VNI) const {
    2368       47402 :   unsigned TrackReg = Reg;
    2369             : 
    2370       74274 :   while (!VNI->isPHIDef()) {
    2371       68482 :     SlotIndex Def = VNI->def;
    2372             :     MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
    2373             :     assert(MI && "No defining instruction");
    2374             :     if (!MI->isFullCopy())
    2375       41610 :       return std::make_pair(VNI, TrackReg);
    2376       32441 :     unsigned SrcReg = MI->getOperand(1).getReg();
    2377       32441 :     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
    2378             :       return std::make_pair(VNI, TrackReg);
    2379             : 
    2380       26876 :     const LiveInterval &LI = LIS->getInterval(SrcReg);
    2381             :     const VNInfo *ValueIn;
    2382             :     // No subrange involved.
    2383       26876 :     if (!SubRangeJoin || !LI.hasSubRanges()) {
    2384       26836 :       LiveQueryResult LRQ = LI.Query(Def);
    2385       26836 :       ValueIn = LRQ.valueIn();
    2386             :     } else {
    2387             :       // Query subranges. Ensure that all matching ones take us to the same def
    2388             :       // (allowing some of them to be undef).
    2389             :       ValueIn = nullptr;
    2390         178 :       for (const LiveInterval::SubRange &S : LI.subranges()) {
    2391             :         // Transform lanemask to a mask in the joined live interval.
    2392         276 :         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
    2393         138 :         if ((SMask & LaneMask).none())
    2394         138 :           continue;
    2395          38 :         LiveQueryResult LRQ = S.Query(Def);
    2396          38 :         if (!ValueIn) {
    2397             :           ValueIn = LRQ.valueIn();
    2398             :           continue;
    2399             :         }
    2400           0 :         if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
    2401           0 :           return std::make_pair(VNI, TrackReg);
    2402             :       }
    2403             :     }
    2404       26876 :     if (ValueIn == nullptr) {
    2405             :       // Reaching an undefined value is legitimate, for example:
    2406             :       //
    2407             :       // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
    2408             :       // 2   %1 = COPY %0         ;; %1 is defined here.
    2409             :       // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
    2410             :       //                          ;; but it's equivalent to "undef".
    2411           4 :       return std::make_pair(nullptr, SrcReg);
    2412             :     }
    2413             :     VNI = ValueIn;
    2414             :     TrackReg = SrcReg;
    2415             :   }
    2416             :   return std::make_pair(VNI, TrackReg);
    2417             : }
    2418             : 
    2419       23858 : bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
    2420             :                                const JoinVals &Other) const {
    2421             :   const VNInfo *Orig0;
    2422             :   unsigned Reg0;
    2423       23858 :   std::tie(Orig0, Reg0) = followCopyChain(Value0);
    2424       23858 :   if (Orig0 == Value1 && Reg0 == Other.Reg)
    2425             :     return true;
    2426             : 
    2427             :   const VNInfo *Orig1;
    2428             :   unsigned Reg1;
    2429       23544 :   std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
    2430             :   // If both values are undefined, and the source registers are the same
    2431             :   // register, the values are identical. Filter out cases where only one
    2432             :   // value is defined.
    2433       23544 :   if (Orig0 == nullptr || Orig1 == nullptr)
    2434           2 :     return Orig0 == Orig1 && Reg0 == Reg1;
    2435             : 
    2436             :   // The values are equal if they are defined at the same place and use the
    2437             :   // same register. Note that we cannot compare VNInfos directly as some of
    2438             :   // them might be from a copy created in mergeSubRangeInto()  while the other
    2439             :   // is from the original LiveInterval.
    2440       23542 :   return Orig0->def == Orig1->def && Reg0 == Reg1;
    2441             : }
    2442             : 
    2443             : JoinVals::ConflictResolution
    2444     5430355 : JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
    2445     5430355 :   Val &V = Vals[ValNo];
    2446             :   assert(!V.isAnalyzed() && "Value has already been analyzed!");
    2447     5430355 :   VNInfo *VNI = LR.getValNumInfo(ValNo);
    2448     5430355 :   if (VNI->isUnused()) {
    2449         996 :     V.WriteLanes = LaneBitmask::getAll();
    2450         996 :     return CR_Keep;
    2451             :   }
    2452             : 
    2453             :   // Get the instruction defining this value, compute the lanes written.
    2454             :   const MachineInstr *DefMI = nullptr;
    2455     5429359 :   if (VNI->isPHIDef()) {
    2456             :     // Conservatively assume that all lanes in a PHI are valid.
    2457      977610 :     LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
    2458      977610 :                                      : TRI->getSubRegIndexLaneMask(SubIdx);
    2459      977610 :     V.ValidLanes = V.WriteLanes = Lanes;
    2460             :   } else {
    2461             :     DefMI = Indexes->getInstructionFromIndex(VNI->def);
    2462             :     assert(DefMI != nullptr);
    2463     4451749 :     if (SubRangeJoin) {
    2464             :       // We don't care about the lanes when joining subregister ranges.
    2465      383228 :       V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
    2466      383228 :       if (DefMI->isImplicitDef()) {
    2467         200 :         V.ValidLanes = LaneBitmask::getNone();
    2468         200 :         V.ErasableImplicitDef = true;
    2469             :       }
    2470             :     } else {
    2471     4068521 :       bool Redef = false;
    2472     4068521 :       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
    2473             : 
    2474             :       // If this is a read-modify-write instruction, there may be more valid
    2475             :       // lanes than the ones written by this instruction.
    2476             :       // This only covers partial redef operands. DefMI may have normal use
    2477             :       // operands reading the register. They don't contribute valid lanes.
    2478             :       //
    2479             :       // This adds ssub1 to the set of valid lanes in %src:
    2480             :       //
    2481             :       //   %src:ssub1 = FOO
    2482             :       //
    2483             :       // This leaves only ssub1 valid, making any other lanes undef:
    2484             :       //
    2485             :       //   %src:ssub1<def,read-undef> = FOO %src:ssub2
    2486             :       //
    2487             :       // The <read-undef> flag on the def operand means that old lane values are
    2488             :       // not important.
    2489     4068521 :       if (Redef) {
    2490      418020 :         V.RedefVNI = LR.Query(VNI->def).valueIn();
    2491             :         assert((TrackSubRegLiveness || V.RedefVNI) &&
    2492             :                "Instruction is reading nonexistent value");
    2493      418020 :         if (V.RedefVNI != nullptr) {
    2494      418020 :           computeAssignment(V.RedefVNI->id, Other);
    2495      418020 :           V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
    2496             :         }
    2497             :       }
    2498             : 
    2499             :       // An IMPLICIT_DEF writes undef values.
    2500     4068521 :       if (DefMI->isImplicitDef()) {
    2501             :         // We normally expect IMPLICIT_DEF values to be live only until the end
    2502             :         // of their block. If the value is really live longer and gets pruned in
    2503             :         // another block, this flag is cleared again.
    2504        8662 :         V.ErasableImplicitDef = true;
    2505        8662 :         V.ValidLanes &= ~V.WriteLanes;
    2506             :       }
    2507             :     }
    2508             :   }
    2509             : 
    2510             :   // Find the value in Other that overlaps VNI->def, if any.
    2511     5429359 :   LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
    2512             : 
    2513             :   // It is possible that both values are defined by the same instruction, or
    2514             :   // the values are PHIs defined in the same block. When that happens, the two
    2515             :   // values should be merged into one, but not into any preceding value.
    2516             :   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
    2517     5429359 :   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
    2518             :     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
    2519             : 
    2520             :     // One value stays, the other is merged. Keep the earlier one, or the first
    2521             :     // one we see.
    2522        7264 :     if (OtherVNI->def < VNI->def)
    2523           0 :       Other.computeAssignment(OtherVNI->id, *this);
    2524        7264 :     else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
    2525             :       // This is an early-clobber def overlapping a live-in value in the other
    2526             :       // register. Not mergeable.
    2527           0 :       V.OtherVNI = OtherLRQ.valueIn();
    2528           0 :       return CR_Impossible;
    2529             :     }
    2530        7264 :     V.OtherVNI = OtherVNI;
    2531        7264 :     Val &OtherV = Other.Vals[OtherVNI->id];
    2532             :     // Keep this value, check for conflicts when analyzing OtherVNI.
    2533        7264 :     if (!OtherV.isAnalyzed())
    2534             :       return CR_Keep;
    2535             :     // Both sides have been analyzed now.
    2536             :     // Allow overlapping PHI values. Any real interference would show up in a
    2537             :     // predecessor, the PHI itself can't introduce any conflicts.
    2538        2246 :     if (VNI->isPHIDef())
    2539             :       return CR_Merge;
    2540         176 :     if ((V.ValidLanes & OtherV.ValidLanes).any())
    2541             :       // Overlapping lanes can't be resolved.
    2542             :       return CR_Impossible;
    2543             :     else
    2544          16 :       return CR_Merge;
    2545             :   }
    2546             : 
    2547             :   // No simultaneous def. Is Other live at the def?
    2548     5422095 :   V.OtherVNI = OtherLRQ.valueIn();
    2549     5422095 :   if (!V.OtherVNI)
    2550             :     // No overlap, no conflict.
    2551             :     return CR_Keep;
    2552             : 
    2553             :   assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
    2554             : 
    2555             :   // We have overlapping values, or possibly a kill of Other.
    2556             :   // Recursively compute assignments up the dominator tree.
    2557     1406602 :   Other.computeAssignment(V.OtherVNI->id, *this);
    2558     1406602 :   Val &OtherV = Other.Vals[V.OtherVNI->id];
    2559             : 
    2560             :   // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
    2561             :   // This shouldn't normally happen, but ProcessImplicitDefs can leave such
    2562             :   // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
    2563             :   // technically.
    2564             :   //
    2565             :   // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
    2566             :   // to erase the IMPLICIT_DEF instruction.
    2567     1407126 :   if (OtherV.ErasableImplicitDef && DefMI &&
    2568         524 :       DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
    2569             :     LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
    2570             :                       << " extends into "
    2571             :                       << printMBBReference(*DefMI->getParent())
    2572             :                       << ", keeping it.\n");
    2573           3 :     OtherV.ErasableImplicitDef = false;
    2574             :   }
    2575             : 
    2576             :   // Allow overlapping PHI values. Any real interference would show up in a
    2577             :   // predecessor, the PHI itself can't introduce any conflicts.
    2578     1406602 :   if (VNI->isPHIDef())
    2579             :     return CR_Replace;
    2580             : 
    2581             :   // Check for simple erasable conflicts.
    2582     1402050 :   if (DefMI->isImplicitDef()) {
    2583             :     // We need the def for the subregister if there is nothing else live at the
    2584             :     // subrange at this point.
    2585        1703 :     if (TrackSubRegLiveness
    2586        1703 :         && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
    2587           7 :       return CR_Replace;
    2588             :     return CR_Erase;
    2589             :   }
    2590             : 
    2591             :   // Include the non-conflict where DefMI is a coalescable copy that kills
    2592             :   // OtherVNI. We still want the copy erased and value numbers merged.
    2593     1400347 :   if (CP.isCoalescable(DefMI)) {
    2594             :     // Some of the lanes copied from OtherVNI may be undef, making them undef
    2595             :     // here too.
    2596     1120019 :     V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
    2597     1120019 :     return CR_Erase;
    2598             :   }
    2599             : 
    2600             :   // This may not be a real conflict if DefMI simply kills Other and defines
    2601             :   // VNI.
    2602      280328 :   if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
    2603             :     return CR_Keep;
    2604             : 
    2605             :   // Handle the case where VNI and OtherVNI can be proven to be identical:
    2606             :   //
    2607             :   //   %other = COPY %ext
    2608             :   //   %this  = COPY %ext <-- Erase this copy
    2609             :   //
    2610       50661 :   if (DefMI->isFullCopy() && !CP.isPartial() &&
    2611       23858 :       valuesIdentical(VNI, V.OtherVNI, Other)) {
    2612         685 :     V.Identical = true;
    2613         685 :     return CR_Erase;
    2614             :   }
    2615             : 
    2616             :   // If the lanes written by this instruction were all undef in OtherVNI, it is
    2617             :   // still safe to join the live ranges. This can't be done with a simple value
    2618             :   // mapping, though - OtherVNI will map to multiple values:
    2619             :   //
    2620             :   //   1 %dst:ssub0 = FOO                <-- OtherVNI
    2621             :   //   2 %src = BAR                      <-- VNI
    2622             :   //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
    2623             :   //   4 BAZ killed %dst
    2624             :   //   5 QUUX killed %src
    2625             :   //
    2626             :   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
    2627             :   // handles this complex value mapping.
    2628      548172 :   if ((V.WriteLanes & OtherV.ValidLanes).none())
    2629             :     return CR_Replace;
    2630             : 
    2631             :   // If the other live range is killed by DefMI and the live ranges are still
    2632             :   // overlapping, it must be because we're looking at an early clobber def:
    2633             :   //
    2634             :   //   %dst<def,early-clobber> = ASM killed %src
    2635             :   //
    2636             :   // In this case, it is illegal to merge the two live ranges since the early
    2637             :   // clobber def would clobber %src before it was read.
    2638      152201 :   if (OtherLRQ.isKill()) {
    2639             :     // This case where the def doesn't overlap the kill is handled above.
    2640             :     assert(VNI->def.isEarlyClobber() &&
    2641             :            "Only early clobber defs can overlap a kill");
    2642             :     return CR_Impossible;
    2643             :   }
    2644             : 
    2645             :   // VNI is clobbering live lanes in OtherVNI, but there is still the
    2646             :   // possibility that no instructions actually read the clobbered lanes.
    2647             :   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
    2648             :   // Otherwise Other.RI wouldn't be live here.
    2649      304374 :   if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
    2650             :     return CR_Impossible;
    2651             : 
    2652             :   // We need to verify that no instructions are reading the clobbered lanes. To
    2653             :   // save compile time, we'll only check that locally. Don't allow the tainted
    2654             :   // value to escape the basic block.
    2655       69826 :   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
    2656      139652 :   if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
    2657        2424 :     return CR_Impossible;
    2658             : 
    2659             :   // There are still some things that could go wrong besides clobbered lanes
    2660             :   // being read, for example OtherVNI may be only partially redefined in MBB,
    2661             :   // and some clobbered lanes could escape the block. Save this analysis for
    2662             :   // resolveConflicts() when all values have been mapped. We need to know
    2663             :   // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
    2664             :   // that now - the recursive analyzeValue() calls must go upwards in the
    2665             :   // dominator tree.
    2666             :   return CR_Unresolved;
    2667             : }
    2668             : 
    2669     7196694 : void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
    2670     7196694 :   Val &V = Vals[ValNo];
    2671     7196694 :   if (V.isAnalyzed()) {
    2672             :     // Recursion should always move up the dominator tree, so ValNo is not
    2673             :     // supposed to reappear before it has been assigned.
    2674             :     assert(Assignments[ValNo] != -1 && "Bad recursion?");
    2675             :     return;
    2676             :   }
    2677     5430355 :   switch ((V.Resolution = analyzeValue(ValNo, Other))) {
    2678     1124574 :   case CR_Erase:
    2679             :   case CR_Merge:
    2680             :     // Merge this ValNo into OtherVNI.
    2681             :     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
    2682             :     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
    2683     1124574 :     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
    2684             :     LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
    2685             :                       << LR.getValNumInfo(ValNo)->def << " into "
    2686             :                       << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
    2687             :                       << V.OtherVNI->def << " --> @"
    2688             :                       << NewVNInfo[Assignments[ValNo]]->def << '\n');
    2689     1124574 :     break;
    2690      193846 :   case CR_Replace:
    2691             :   case CR_Unresolved: {
    2692             :     // The other value is going to be pruned if this join is successful.
    2693             :     assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
    2694      193846 :     Val &OtherV = Other.Vals[V.OtherVNI->id];
    2695             :     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
    2696             :     // its lanes.
    2697      581538 :     if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
    2698      155852 :       OtherV.ErasableImplicitDef = false;
    2699      193846 :     OtherV.Pruned = true;
    2700             :     LLVM_FALLTHROUGH;
    2701             :   }
    2702     4305781 :   default:
    2703             :     // This value number needs to go in the final joined live range.
    2704     4305781 :     Assignments[ValNo] = NewVNInfo.size();
    2705     8611562 :     NewVNInfo.push_back(LR.getValNumInfo(ValNo));
    2706     4305781 :     break;
    2707             :   }
    2708             : }
    2709             : 
    2710     2138474 : bool JoinVals::mapValues(JoinVals &Other) {
    2711     7429919 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    2712     5372072 :     computeAssignment(i, Other);
    2713    10744144 :     if (Vals[i].Resolution == CR_Impossible) {
    2714             :       LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
    2715             :                         << '@' << LR.getValNumInfo(i)->def << '\n');
    2716             :       return false;
    2717             :     }
    2718             :   }
    2719             :   return true;
    2720             : }
    2721             : 
    2722           0 : bool JoinVals::
    2723             : taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
    2724             :             SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
    2725           0 :   VNInfo *VNI = LR.getValNumInfo(ValNo);
    2726           0 :   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
    2727           0 :   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
    2728             : 
    2729             :   // Scan Other.LR from VNI.def to MBBEnd.
    2730           0 :   LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
    2731             :   assert(OtherI != Other.LR.end() && "No conflict?");
    2732             :   do {
    2733             :     // OtherI is pointing to a tainted value. Abort the join if the tainted
    2734             :     // lanes escape the block.
    2735           0 :     SlotIndex End = OtherI->end;
    2736           0 :     if (End >= MBBEnd) {
    2737             :       LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
    2738             :                         << OtherI->valno->id << '@' << OtherI->start << '\n');
    2739           0 :       return false;
    2740             :     }
    2741             :     LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
    2742             :                       << OtherI->valno->id << '@' << OtherI->start << " to "
    2743             :                       << End << '\n');
    2744             :     // A dead def is not a problem.
    2745           0 :     if (End.isDead())
    2746             :       break;
    2747           0 :     TaintExtent.push_back(std::make_pair(End, TaintedLanes));
    2748             : 
    2749             :     // Check for another def in the MBB.
    2750           0 :     if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
    2751             :       break;
    2752             : 
    2753             :     // Lanes written by the new def are no longer tainted.
    2754           0 :     const Val &OV = Other.Vals[OtherI->valno->id];
    2755           0 :     TaintedLanes &= ~OV.WriteLanes;
    2756           0 :     if (!OV.RedefVNI)
    2757             :       break;
    2758           0 :   } while (TaintedLanes.any());
    2759             :   return true;
    2760             : }
    2761             : 
    2762           0 : bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
    2763             :                          LaneBitmask Lanes) const {
    2764             :   if (MI.isDebugInstr())
    2765           0 :     return false;
    2766           0 :   for (const MachineOperand &MO : MI.operands()) {
    2767           0 :     if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
    2768           0 :       continue;
    2769           0 :     if (!MO.readsReg())
    2770           0 :       continue;
    2771           0 :     unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
    2772           0 :     if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
    2773           0 :       return true;
    2774             :   }
    2775             :   return false;
    2776             : }
    2777             : 
    2778     2003950 : bool JoinVals::resolveConflicts(JoinVals &Other) {
    2779     6981268 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    2780     5008166 :     Val &V = Vals[i];
    2781             :     assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
    2782     5008166 :     if (V.Resolution != CR_Unresolved)
    2783     4970960 :       continue;
    2784             :     LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
    2785             :                       << LR.getValNumInfo(i)->def << '\n');
    2786       37206 :     if (SubRangeJoin)
    2787       30848 :       return false;
    2788             : 
    2789             :     ++NumLaneConflicts;
    2790             :     assert(V.OtherVNI && "Inconsistent conflict resolution.");
    2791       37206 :     VNInfo *VNI = LR.getValNumInfo(i);
    2792       37206 :     const Val &OtherV = Other.Vals[V.OtherVNI->id];
    2793             : 
    2794             :     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
    2795             :     // join, those lanes will be tainted with a wrong value. Get the extent of
    2796             :     // the tainted lanes.
    2797       74412 :     LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
    2798             :     SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
    2799       37206 :     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
    2800             :       // Tainted lanes would extend beyond the basic block.
    2801             :       return false;
    2802             : 
    2803             :     assert(!TaintExtent.empty() && "There should be at least one conflict.");
    2804             : 
    2805             :     // Now look at the instructions from VNI->def to TaintExtent (inclusive).
    2806       37199 :     MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
    2807             :     MachineBasicBlock::iterator MI = MBB->begin();
    2808       37199 :     if (!VNI->isPHIDef()) {
    2809             :       MI = Indexes->getInstructionFromIndex(VNI->def);
    2810             :       // No need to check the instruction defining VNI for reads.
    2811             :       ++MI;
    2812             :     }
    2813             :     assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
    2814             :            "Interference ends on VNI->def. Should have been handled earlier");
    2815             :     MachineInstr *LastMI =
    2816       37199 :       Indexes->getInstructionFromIndex(TaintExtent.front().first);
    2817             :     assert(LastMI && "Range must end at a proper instruction");
    2818             :     unsigned TaintNum = 0;
    2819             :     while (true) {
    2820             :       assert(MI != MBB->end() && "Bad LastMI");
    2821      508218 :       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
    2822             :         LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
    2823             :         return false;
    2824             :       }
    2825             :       // LastMI is the last instruction to use the current value.
    2826      477377 :       if (&*MI == LastMI) {
    2827       15325 :         if (++TaintNum == TaintExtent.size())
    2828             :           break;
    2829             :         LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
    2830             :         assert(LastMI && "Range must end at a proper instruction");
    2831        8967 :         TaintedLanes = TaintExtent[TaintNum].second;
    2832             :       }
    2833             :       ++MI;
    2834             :     }
    2835             : 
    2836             :     // The tainted lanes are unused.
    2837        6358 :     V.Resolution = CR_Replace;
    2838             :     ++NumLaneResolves;
    2839             :   }
    2840             :   return true;
    2841             : }
    2842             : 
    2843     2018134 : bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
    2844     2018134 :   Val &V = Vals[ValNo];
    2845     2018134 :   if (V.Pruned || V.PrunedComputed)
    2846             :     return V.Pruned;
    2847             : 
    2848     1974266 :   if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
    2849             :     return V.Pruned;
    2850             : 
    2851             :   // Follow copies up the dominator tree and check if any intermediate value
    2852             :   // has been pruned.
    2853     1009061 :   V.PrunedComputed = true;
    2854     1009061 :   V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
    2855     1009061 :   return V.Pruned;
    2856             : }
    2857             : 
    2858     1965064 : void JoinVals::pruneValues(JoinVals &Other,
    2859             :                            SmallVectorImpl<SlotIndex> &EndPoints,
    2860             :                            bool changeInstrs) {
    2861     6902915 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    2862     9875702 :     SlotIndex Def = LR.getValNumInfo(i)->def;
    2863     4937851 :     switch (Vals[i].Resolution) {
    2864             :     case CR_Keep:
    2865             :       break;
    2866      116482 :     case CR_Replace: {
    2867             :       // This value takes precedence over the value in Other.LR.
    2868      116482 :       LIS->pruneValue(Other.LR, Def, &EndPoints);
    2869             :       // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
    2870             :       // instructions are only inserted to provide a live-out value for PHI
    2871             :       // predecessors, so the instruction should simply go away once its value
    2872             :       // has been replaced.
    2873      116482 :       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
    2874      116482 :       bool EraseImpDef = OtherV.ErasableImplicitDef &&
    2875          28 :                          OtherV.Resolution == CR_Keep;
    2876      116482 :       if (!Def.isBlock()) {
    2877      116290 :         if (changeInstrs) {
    2878             :           // Remove <def,read-undef> flags. This def is now a partial redef.
    2879             :           // Also remove dead flags since the joined live range will
    2880             :           // continue past this instruction.
    2881      442436 :           for (MachineOperand &MO :
    2882      558725 :                Indexes->getInstructionFromIndex(Def)->operands()) {
    2883      442436 :             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
    2884      116289 :               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
    2885             :                 MO.setIsUndef(false);
    2886             :               MO.setIsDead(false);
    2887             :             }
    2888             :           }
    2889             :         }
    2890             :         // This value will reach instructions below, but we need to make sure
    2891             :         // the live range also reaches the instruction at Def.
    2892      116290 :         if (!EraseImpDef)
    2893      116263 :           EndPoints.push_back(Def);
    2894             :       }
    2895             :       LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
    2896             :                         << ": " << Other.LR << '\n');
    2897             :       break;
    2898             :     }
    2899     1009073 :     case CR_Erase:
    2900             :     case CR_Merge:
    2901     1009073 :       if (isPrunedValue(i, Other)) {
    2902             :         // This value is ultimately a copy of a pruned value in LR or Other.LR.
    2903             :         // We can no longer trust the value mapping computed by
    2904             :         // computeAssignment(), the value that was originally copied could have
    2905             :         // been replaced.
    2906       40868 :         LIS->pruneValue(LR, Def, &EndPoints);
    2907             :         LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
    2908             :                           << Def << ": " << LR << '\n');
    2909             :       }
    2910             :       break;
    2911             :     case CR_Unresolved:
    2912             :     case CR_Impossible:
    2913             :       llvm_unreachable("Unresolved conflicts");
    2914             :     }
    2915             :   }
    2916     1965064 : }
    2917             : 
    2918             : /// Consider the following situation when coalescing the copy between
    2919             : /// %31 and %45 at 800. (The vertical lines represent live range segments.)
    2920             : ///
    2921             : ///                              Main range         Subrange 0004 (sub2)
    2922             : ///                              %31    %45           %31    %45
    2923             : ///  544    %45 = COPY %28               +                    +
    2924             : ///                                      | v1                 | v1
    2925             : ///  560B bb.1:                          +                    +
    2926             : ///  624        = %45.sub2               | v2                 | v2
    2927             : ///  800    %31 = COPY %45        +      +             +      +
    2928             : ///                               | v0                 | v0
    2929             : ///  816    %31.sub1 = ...        +                    |
    2930             : ///  880    %30 = COPY %31        | v1                 +
    2931             : ///  928    %45 = COPY %30        |      +                    +
    2932             : ///                               |      | v0                 | v0  <--+
    2933             : ///  992B   ; backedge -> bb.1    |      +                    +        |
    2934             : /// 1040        = %31.sub0        +                                    |
    2935             : ///                                                 This value must remain
    2936             : ///                                                 live-out!
    2937             : ///
    2938             : /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
    2939             : /// redundant, since it copies the value from %45 back into it. The
    2940             : /// conflict resolution for the main range determines that %45.v0 is
    2941             : /// to be erased, which is ok since %31.v1 is identical to it.
    2942             : /// The problem happens with the subrange for sub2: it has to be live
    2943             : /// on exit from the block, but since 928 was actually a point of
    2944             : /// definition of %45.sub2, %45.sub2 was not live immediately prior
    2945             : /// to that definition. As a result, when 928 was erased, the value v0
    2946             : /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
    2947             : /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
    2948             : /// providing an incorrect value to the use at 624.
    2949             : ///
    2950             : /// Since the main-range values %31.v1 and %45.v0 were proved to be
    2951             : /// identical, the corresponding values in subranges must also be the
    2952             : /// same. A redundant copy is removed because it's not needed, and not
    2953             : /// because it copied an undefined value, so any liveness that originated
    2954             : /// from that copy cannot disappear. When pruning a value that started
    2955             : /// at the removed copy, the corresponding identical value must be
    2956             : /// extended to replace it.
    2957           0 : void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
    2958             :   // Look for values being erased.
    2959             :   bool DidPrune = false;
    2960           0 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    2961           0 :     Val &V = Vals[i];
    2962             :     // We should trigger in all cases in which eraseInstrs() does something.
    2963             :     // match what eraseInstrs() is doing, print a message so
    2964           0 :     if (V.Resolution != CR_Erase &&
    2965           0 :         (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
    2966           0 :       continue;
    2967             : 
    2968             :     // Check subranges at the point where the copy will be removed.
    2969           0 :     SlotIndex Def = LR.getValNumInfo(i)->def;
    2970           0 :     SlotIndex OtherDef;
    2971           0 :     if (V.Identical)
    2972           0 :       OtherDef = V.OtherVNI->def;
    2973             : 
    2974             :     // Print message so mismatches with eraseInstrs() can be diagnosed.
    2975             :     LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
    2976             :                       << '\n');
    2977           0 :     for (LiveInterval::SubRange &S : LI.subranges()) {
    2978           0 :       LiveQueryResult Q = S.Query(Def);
    2979             : 
    2980             :       // If a subrange starts at the copy then an undefined value has been
    2981             :       // copied and we must remove that subrange value as well.
    2982             :       VNInfo *ValueOut = Q.valueOutOrDead();
    2983           0 :       if (ValueOut != nullptr && Q.valueIn() == nullptr) {
    2984             :         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
    2985             :                           << " at " << Def << "\n");
    2986             :         SmallVector<SlotIndex,8> EndPoints;
    2987           0 :         LIS->pruneValue(S, Def, &EndPoints);
    2988             :         DidPrune = true;
    2989             :         // Mark value number as unused.
    2990             :         ValueOut->markUnused();
    2991             : 
    2992           0 :         if (V.Identical && S.Query(OtherDef).valueOut()) {
    2993             :           // If V is identical to V.OtherVNI (and S was live at OtherDef),
    2994             :           // then we can't simply prune V from S. V needs to be replaced
    2995             :           // with V.OtherVNI.
    2996           0 :           LIS->extendToIndices(S, EndPoints);
    2997             :         }
    2998             :         continue;
    2999             :       }
    3000             :       // If a subrange ends at the copy, then a value was copied but only
    3001             :       // partially used later. Shrink the subregister range appropriately.
    3002           0 :       if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
    3003             :         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
    3004             :                           << PrintLaneMask(S.LaneMask) << " at " << Def
    3005             :                           << "\n");
    3006             :         ShrinkMask |= S.LaneMask;
    3007             :       }
    3008             :     }
    3009             :   }
    3010           0 :   if (DidPrune)
    3011           0 :     LI.removeEmptySubRanges();
    3012           0 : }
    3013             : 
    3014             : /// Check if any of the subranges of @p LI contain a definition at @p Def.
    3015      208378 : static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
    3016      506590 :   for (LiveInterval::SubRange &SR : LI.subranges()) {
    3017      506590 :     if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
    3018      503250 :       if (VNI->def == Def)
    3019             :         return true;
    3020             :   }
    3021             :   return false;
    3022             : }
    3023             : 
    3024      158293 : void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
    3025             :   assert(&static_cast<LiveRange&>(LI) == &LR);
    3026             : 
    3027      559656 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    3028      802726 :     if (Vals[i].Resolution != CR_Keep)
    3029             :       continue;
    3030      209571 :     VNInfo *VNI = LR.getValNumInfo(i);
    3031      209571 :     if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
    3032      209571 :       continue;
    3033           0 :     Vals[i].Pruned = true;
    3034           0 :     ShrinkMainRange = true;
    3035             :   }
    3036      158293 : }
    3037             : 
    3038      365562 : void JoinVals::removeImplicitDefs() {
    3039      751482 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    3040      385920 :     Val &V = Vals[i];
    3041      385920 :     if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
    3042             :       continue;
    3043             : 
    3044           1 :     VNInfo *VNI = LR.getValNumInfo(i);
    3045             :     VNI->markUnused();
    3046           1 :     LR.removeValNo(VNI);
    3047             :   }
    3048      365562 : }
    3049             : 
    3050     1599502 : void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
    3051             :                            SmallVectorImpl<unsigned> &ShrinkRegs,
    3052             :                            LiveInterval *LI) {
    3053     6151433 :   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
    3054             :     // Get the def location before markUnused() below invalidates it.
    3055     9103862 :     SlotIndex Def = LR.getValNumInfo(i)->def;
    3056     4551931 :     switch (Vals[i].Resolution) {
    3057     3605097 :     case CR_Keep: {
    3058             :       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
    3059             :       // longer. The IMPLICIT_DEF instructions are only inserted by
    3060             :       // PHIElimination to guarantee that all PHI predecessors have a value.
    3061     3605097 :       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
    3062             :         break;
    3063             :       // Remove value number i from LR.
    3064             :       // For intervals with subranges, removing a segment from the main range
    3065             :       // may require extending the previous segment: for each definition of
    3066             :       // a subregister, there will be a corresponding def in the main range.
    3067             :       // That def may fall in the middle of a segment from another subrange.
    3068             :       // In such cases, removing this def from the main range must be
    3069             :       // complemented by extending the main range to account for the liveness
    3070             :       // of the other subrange.
    3071             :       VNInfo *VNI = LR.getValNumInfo(i);
    3072          25 :       SlotIndex Def = VNI->def;
    3073             :       // The new end point of the main range segment to be extended.
    3074          25 :       SlotIndex NewEnd;
    3075          25 :       if (LI != nullptr) {
    3076          22 :         LiveRange::iterator I = LR.FindSegmentContaining(Def);
    3077             :         assert(I != LR.end());
    3078             :         // Do not extend beyond the end of the segment being removed.
    3079             :         // The segment may have been pruned in preparation for joining
    3080             :         // live ranges.
    3081          22 :         NewEnd = I->end;
    3082             :       }
    3083             : 
    3084          25 :       LR.removeValNo(VNI);
    3085             :       // Note that this VNInfo is reused and still referenced in NewVNInfo,
    3086             :       // make it appear like an unused value number.
    3087             :       VNI->markUnused();
    3088             : 
    3089          25 :       if (LI != nullptr && LI->hasSubRanges()) {
    3090             :         assert(static_cast<LiveRange*>(LI) == &LR);
    3091             :         // Determine the end point based on the subrange information:
    3092             :         // minimum of (earliest def of next segment,
    3093             :         //             latest end point of containing segment)
    3094           0 :         SlotIndex ED, LE;
    3095           0 :         for (LiveInterval::SubRange &SR : LI->subranges()) {
    3096           0 :           LiveRange::iterator I = SR.find(Def);
    3097           0 :           if (I == SR.end())
    3098             :             continue;
    3099           0 :           if (I->start > Def)
    3100           0 :             ED = ED.isValid() ? std::min(ED, I->start) : I->start;
    3101             :           else
    3102           0 :             LE = LE.isValid() ? std::max(LE, I->end) : I->end;
    3103             :         }
    3104           0 :         if (LE.isValid())
    3105           0 :           NewEnd = std::min(NewEnd, LE);
    3106           0 :         if (ED.isValid())
    3107           0 :           NewEnd = std::min(NewEnd, ED);
    3108             : 
    3109             :         // We only want to do the extension if there was a subrange that
    3110             :         // was live across Def.
    3111           0 :         if (LE.isValid()) {
    3112           0 :           LiveRange::iterator S = LR.find(Def);
    3113           0 :           if (S != LR.begin())
    3114           0 :             std::prev(S)->end = NewEnd;
    3115             :         }
    3116             :       }
    3117             :       LLVM_DEBUG({
    3118             :         dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
    3119             :         if (LI != nullptr)
    3120             :           dbgs() << "\t\t  LHS = " << *LI << '\n';
    3121             :       });
    3122             :       LLVM_FALLTHROUGH;
    3123             :     }
    3124             : 
    3125             :     case CR_Erase: {
    3126             :       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
    3127             :       assert(MI && "No instruction to erase");
    3128      830028 :       if (MI->isCopy()) {
    3129      812859 :         unsigned Reg = MI->getOperand(1).getReg();
    3130      812859 :         if (TargetRegisterInfo::isVirtualRegister(Reg) &&
    3131      812859 :             Reg != CP.getSrcReg() && Reg != CP.getDstReg())
    3132         438 :           ShrinkRegs.push_back(Reg);
    3133             :       }
    3134      830028 :       ErasedInstrs.insert(MI);
    3135             :       LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
    3136      830028 :       LIS->RemoveMachineInstrFromMaps(*MI);
    3137      830028 :       MI->eraseFromParent();
    3138      830028 :       break;
    3139             :     }
    3140             :     default:
    3141             :       break;
    3142             :     }
    3143             :   }
    3144     1599502 : }
    3145             : 
    3146           0 : void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
    3147             :                                          LaneBitmask LaneMask,
    3148             :                                          const CoalescerPair &CP) {
    3149             :   SmallVector<VNInfo*, 16> NewVNInfo;
    3150             :   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
    3151           0 :                    NewVNInfo, CP, LIS, TRI, true, true);
    3152             :   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
    3153           0 :                    NewVNInfo, CP, LIS, TRI, true, true);
    3154             : 
    3155             :   // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
    3156             :   // We should be able to resolve all conflicts here as we could successfully do
    3157             :   // it on the mainrange already. There is however a problem when multiple
    3158             :   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
    3159             :   // interferences.
    3160           0 :   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
    3161             :     // We already determined that it is legal to merge the intervals, so this
    3162             :     // should never fail.
    3163           0 :     llvm_unreachable("*** Couldn't join subrange!\n");
    3164             :   }
    3165           0 :   if (!LHSVals.resolveConflicts(RHSVals) ||
    3166           0 :       !RHSVals.resolveConflicts(LHSVals)) {
    3167             :     // We already determined that it is legal to merge the intervals, so this
    3168             :     // should never fail.
    3169           0 :     llvm_unreachable("*** Couldn't join subrange!\n");
    3170             :   }
    3171             : 
    3172             :   // The merging algorithm in LiveInterval::join() can't handle conflicting
    3173             :   // value mappings, so we need to remove any live ranges that overlap a
    3174             :   // CR_Replace resolution. Collect a set of end points that can be used to
    3175             :   // restore the live range after joining.
    3176             :   SmallVector<SlotIndex, 8> EndPoints;
    3177           0 :   LHSVals.pruneValues(RHSVals, EndPoints, false);
    3178           0 :   RHSVals.pruneValues(LHSVals, EndPoints, false);
    3179             : 
    3180           0 :   LHSVals.removeImplicitDefs();
    3181           0 :   RHSVals.removeImplicitDefs();
    3182             : 
    3183             :   LRange.verify();
    3184             :   RRange.verify();
    3185             : 
    3186             :   // Join RRange into LHS.
    3187           0 :   LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
    3188             :               NewVNInfo);
    3189             : 
    3190             :   LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
    3191             :                     << ' ' << LRange << "\n");
    3192           0 :   if (EndPoints.empty())
    3193           0 :     return;
    3194             : 
    3195             :   // Recompute the parts of the live range we had to remove because of
    3196             :   // CR_Replace conflicts.
    3197             :   LLVM_DEBUG({
    3198             :     dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
    3199             :     for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
    3200             :       dbgs() << EndPoints[i];
    3201             :       if (i != n-1)
    3202             :         dbgs() << ',';
    3203             :     }
    3204             :     dbgs() << ":  " << LRange << '\n';
    3205             :   });
    3206           0 :   LIS->extendToIndices(LRange, EndPoints);
    3207             : }
    3208             : 
    3209      182012 : void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
    3210             :                                           const LiveRange &ToMerge,
    3211             :                                           LaneBitmask LaneMask,
    3212             :                                           CoalescerPair &CP) {
    3213      182012 :   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
    3214      182012 :   LI.refineSubRanges(Allocator, LaneMask,
    3215             :       [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
    3216             :     if (SR.empty()) {
    3217             :       SR.assign(ToMerge, Allocator);
    3218             :     } else {
    3219             :       // joinSubRegRange() destroys the merged range, so we need a copy.
    3220             :       LiveRange RangeCopy(ToMerge, Allocator);
    3221             :       joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
    3222             :     }
    3223             :   });
    3224      182012 : }
    3225             : 
    3226      911226 : bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
    3227             :   SmallVector<VNInfo*, 16> NewVNInfo;
    3228      911226 :   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
    3229      911226 :   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
    3230      911226 :   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
    3231             :   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
    3232     1822452 :                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
    3233             :   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
    3234     1822452 :                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
    3235             : 
    3236             :   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
    3237             : 
    3238             :   // First compute NewVNInfo and the simple value mappings.
    3239             :   // Detect impossible conflicts early.
    3240      911226 :   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
    3241       80627 :     return false;
    3242             : 
    3243             :   // Some conflicts can only be resolved after all values have been mapped.
    3244      830599 :   if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
    3245       30848 :     return false;
    3246             : 
    3247             :   // All clear, the live ranges can be merged.
    3248      799751 :   if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
    3249      158293 :     BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
    3250             : 
    3251             :     // Transform lanemasks from the LHS to masks in the coalesced register and
    3252             :     // create initial subranges if necessary.
    3253      158293 :     unsigned DstIdx = CP.getDstIdx();
    3254      158293 :     if (!LHS.hasSubRanges()) {
    3255         336 :       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
    3256         336 :                                      : TRI->getSubRegIndexLaneMask(DstIdx);
    3257             :       // LHS must support subregs or we wouldn't be in this codepath.
    3258             :       assert(Mask.any());
    3259         336 :       LHS.createSubRangeFrom(Allocator, Mask, LHS);
    3260      157957 :     } else if (DstIdx != 0) {
    3261             :       // Transform LHS lanemasks to new register class if necessary.
    3262           0 :       for (LiveInterval::SubRange &R : LHS.subranges()) {
    3263           0 :         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
    3264           0 :         R.LaneMask = Mask;
    3265             :       }
    3266             :     }
    3267             :     LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
    3268             :                       << '\n');
    3269             : 
    3270             :     // Determine lanemasks of RHS in the coalesced register and merge subranges.
    3271      158293 :     unsigned SrcIdx = CP.getSrcIdx();
    3272      158293 :     if (!RHS.hasSubRanges()) {
    3273         442 :       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
    3274      139624 :                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
    3275      139624 :       mergeSubRangeInto(LHS, RHS, Mask, CP);
    3276             :     } else {
    3277             :       // Pair up subranges and merge.
    3278       61057 :       for (LiveInterval::SubRange &R : RHS.subranges()) {
    3279       42388 :         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
    3280       42388 :         mergeSubRangeInto(LHS, R, Mask, CP);
    3281             :       }
    3282             :     }
    3283             :     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
    3284             : 
    3285             :     // Pruning implicit defs from subranges may result in the main range
    3286             :     // having stale segments.
    3287      158293 :     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
    3288             : 
    3289      158293 :     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
    3290      158293 :     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
    3291             :   }
    3292             : 
    3293             :   // The merging algorithm in LiveInterval::join() can't handle conflicting
    3294             :   // value mappings, so we need to remove any live ranges that overlap a
    3295             :   // CR_Replace resolution. Collect a set of end points that can be used to
    3296             :   // restore the live range after joining.
    3297             :   SmallVector<SlotIndex, 8> EndPoints;
    3298      799751 :   LHSVals.pruneValues(RHSVals, EndPoints, true);
    3299      799751 :   RHSVals.pruneValues(LHSVals, EndPoints, true);
    3300             : 
    3301             :   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
    3302             :   // registers to require trimming.
    3303             :   SmallVector<unsigned, 8> ShrinkRegs;
    3304      799751 :   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
    3305      799751 :   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
    3306      800189 :   while (!ShrinkRegs.empty())
    3307         876 :     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
    3308             : 
    3309             :   // Join RHS into LHS.
    3310      799751 :   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
    3311             : 
    3312             :   // Kill flags are going to be wrong if the live ranges were overlapping.
    3313             :   // Eventually, we should simply clear all kill flags when computing live
    3314             :   // ranges. They are reinserted after register allocation.
    3315      799751 :   MRI->clearKillFlags(LHS.reg);
    3316      799751 :   MRI->clearKillFlags(RHS.reg);
    3317             : 
    3318      799751 :   if (!EndPoints.empty()) {
    3319             :     // Recompute the parts of the live range we had to remove because of
    3320             :     // CR_Replace conflicts.
    3321             :     LLVM_DEBUG({
    3322             :       dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
    3323             :       for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
    3324             :         dbgs() << EndPoints[i];
    3325             :         if (i != n-1)
    3326             :           dbgs() << ',';
    3327             :       }
    3328             :       dbgs() << ":  " << LHS << '\n';
    3329             :     });
    3330       67071 :     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
    3331             :   }
    3332             : 
    3333             :   return true;
    3334             : }
    3335             : 
    3336      960020 : bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
    3337      960020 :   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
    3338             : }
    3339             : 
    3340             : namespace {
    3341             : 
    3342             : /// Information concerning MBB coalescing priority.
    3343             : struct MBBPriorityInfo {
    3344             :   MachineBasicBlock *MBB;
    3345             :   unsigned Depth;
    3346             :   bool IsSplit;
    3347             : 
    3348             :   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
    3349      462693 :     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
    3350             : };
    3351             : 
    3352             : } // end anonymous namespace
    3353             : 
    3354             : /// C-style comparator that sorts first based on the loop depth of the basic
    3355             : /// block (the unsigned), and then on the MBB number.
    3356             : ///
    3357             : /// EnableGlobalCopies assumes that the primary sort key is loop depth.
    3358     1426823 : static int compareMBBPriority(const MBBPriorityInfo *LHS,
    3359             :                               const MBBPriorityInfo *RHS) {
    3360             :   // Deeper loops first
    3361     1426823 :   if (LHS->Depth != RHS->Depth)
    3362      144492 :     return LHS->Depth > RHS->Depth ? -1 : 1;
    3363             : 
    3364             :   // Try to unsplit critical edges next.
    3365     1339376 :   if (LHS->IsSplit != RHS->IsSplit)
    3366           0 :     return LHS->IsSplit ? -1 : 1;
    3367             : 
    3368             :   // Prefer blocks that are more connected in the CFG. This takes care of
    3369             :   // the most difficult copies first while intervals are short.
    3370     1339376 :   unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
    3371     1339376 :   unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
    3372     1339376 :   if (cl != cr)
    3373     1222022 :     return cl > cr ? -1 : 1;
    3374             : 
    3375             :   // As a last resort, sort by block number.
    3376      673762 :   return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
    3377             : }
    3378             : 
    3379             : /// \returns true if the given copy uses or defines a local live range.
    3380     1617981 : static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
    3381     1617981 :   if (!Copy->isCopy())
    3382             :     return false;
    3383             : 
    3384     3202174 :   if (Copy->getOperand(1).isUndef())
    3385             :     return false;
    3386             : 
    3387     1601053 :   unsigned SrcReg = Copy->getOperand(1).getReg();
    3388     1601053 :   unsigned DstReg = Copy->getOperand(0).getReg();
    3389             :   if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
    3390     1601053 :       || TargetRegisterInfo::isPhysicalRegister(DstReg))
    3391             :     return false;
    3392             : 
    3393      885661 :   return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
    3394     1040636 :     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
    3395             : }
    3396             : 
    3397      390971 : void RegisterCoalescer::lateLiveIntervalUpdate() {
    3398      390981 :   for (unsigned reg : ToBeUpdated) {
    3399          10 :     if (!LIS->hasInterval(reg))
    3400             :       continue;
    3401          10 :     LiveInterval &LI = LIS->getInterval(reg);
    3402          10 :     shrinkToUses(&LI, &DeadDefs);
    3403          10 :     if (!DeadDefs.empty())
    3404           0 :       eliminateDeadDefs();
    3405             :   }
    3406             :   ToBeUpdated.clear();
    3407      390970 : }
    3408             : 
    3409     1025895 : bool RegisterCoalescer::
    3410             : copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
    3411             :   bool Progress = false;
    3412     3426592 :   for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
    3413     4801390 :     if (!CurrList[i])
    3414      551253 :       continue;
    3415             :     // Skip instruction pointers that have already been erased, for example by
    3416             :     // dead code elimination.
    3417     1874906 :     if (ErasedInstrs.count(CurrList[i])) {
    3418       25464 :       CurrList[i] = nullptr;
    3419       25464 :       continue;
    3420             :     }
    3421     1849444 :     bool Again = false;
    3422     1849444 :     bool Success = joinCopy(CurrList[i], Again);
    3423     1849444 :     Progress |= Success;
    3424     1849444 :     if (Success || !Again)
    3425     1614126 :       CurrList[i] = nullptr;
    3426             :   }
    3427     1025897 :   return Progress;
    3428             : }
    3429             : 
    3430             : /// Check if DstReg is a terminal node.
    3431             : /// I.e., it does not have any affinity other than \p Copy.
    3432          21 : static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
    3433             :                           const MachineRegisterInfo *MRI) {
    3434             :   assert(Copy.isCopyLike());
    3435             :   // Check if the destination of this copy as any other affinity.
    3436          50 :   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
    3437          48 :     if (&MI != &Copy && MI.isCopyLike())
    3438             :       return false;
    3439             :   return true;
    3440             : }
    3441             : 
    3442     1743463 : bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
    3443             :   assert(Copy.isCopyLike());
    3444     1743463 :   if (!UseTerminalRule)
    3445             :     return false;
    3446             :   unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
    3447          33 :   isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
    3448             :   // Check if the destination of this copy has any other affinity.
    3449          33 :   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
    3450             :       // If SrcReg is a physical register, the copy won't be coalesced.
    3451             :       // Ignoring it may have other side effect (like missing
    3452             :       // rematerialization). So keep it.
    3453          53 :       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
    3454          20 :       !isTerminalReg(DstReg, Copy, MRI))
    3455          31 :     return false;
    3456             : 
    3457             :   // DstReg is a terminal node. Check if it interferes with any other
    3458             :   // copy involving SrcReg.
    3459           2 :   const MachineBasicBlock *OrigBB = Copy.getParent();
    3460           2 :   const LiveInterval &DstLI = LIS->getInterval(DstReg);
    3461           9 :   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
    3462             :     // Technically we should check if the weight of the new copy is
    3463             :     // interesting compared to the other one and update the weight
    3464             :     // of the copies accordingly. However, this would only work if
    3465             :     // we would gather all the copies first then coalesce, whereas
    3466             :     // right now we interleave both actions.
    3467             :     // For now, just consider the copies that are in the same block.
    3468           7 :     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
    3469           5 :       continue;
    3470             :     unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
    3471           1 :     isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
    3472             :                 OtherSubReg);
    3473           1 :     if (OtherReg == SrcReg)
    3474           0 :       OtherReg = OtherSrcReg;
    3475             :     // Check if OtherReg is a non-terminal.
    3476           3 :     if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
    3477           1 :         isTerminalReg(OtherReg, MI, MRI))
    3478           0 :       continue;
    3479             :     // Check that OtherReg interfere with DstReg.
    3480           2 :     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
    3481             :       LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
    3482             :                         << '\n');
    3483           1 :       return true;
    3484             :     }
    3485             :   }
    3486             :   return false;
    3487             : }
    3488             : 
    3489             : void
    3490      462692 : RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
    3491             :   LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
    3492             : 
    3493             :   // Collect all copy-like instructions in MBB. Don't start coalescing anything
    3494             :   // yet, it might invalidate the iterator.
    3495      462692 :   const unsigned PrevSize = WorkList.size();
    3496      462692 :   if (JoinGlobalCopies) {
    3497             :     SmallVector<MachineInstr*, 2> LocalTerminals;
    3498             :     SmallVector<MachineInstr*, 2> GlobalTerminals;
    3499             :     // Coalesce copies bottom-up to coalesce local defs before local uses. They
    3500             :     // are not inherently easier to resolve, but slightly preferable until we
    3501             :     // have local live range splitting. In particular this is required by
    3502             :     // cmp+jmp macro fusion.
    3503             :     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
    3504     5044120 :          MII != E; ++MII) {
    3505             :       if (!MII->isCopyLike())
    3506             :         continue;
    3507     1617982 :       bool ApplyTerminalRule = applyTerminalRule(*MII);
    3508     1617981 :       if (isLocalCopy(&(*MII), LIS)) {
    3509      826487 :         if (ApplyTerminalRule)
    3510           1 :           LocalTerminals.push_back(&(*MII));
    3511             :         else
    3512      826486 :           LocalWorkList.push_back(&(*MII));
    3513             :       } else {
    3514      791494 :         if (ApplyTerminalRule)
    3515           0 :           GlobalTerminals.push_back(&(*MII));
    3516             :         else
    3517      791494 :           WorkList.push_back(&(*MII));
    3518             :       }
    3519             :     }
    3520             :     // Append the copies evicted by the terminal rule at the end of the list.
    3521      848530 :     LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
    3522      848532 :     WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
    3523             :   }
    3524             :   else {
    3525             :     SmallVector<MachineInstr*, 2> Terminals;
    3526      400666 :     for (MachineInstr &MII : *MBB)
    3527             :       if (MII.isCopyLike()) {
    3528      125482 :         if (applyTerminalRule(MII))
    3529           0 :           Terminals.push_back(&MII);
    3530             :         else
    3531      125482 :           WorkList.push_back(&MII);
    3532             :       }
    3533             :     // Append the copies evicted by the terminal rule at the end of the list.
    3534       76854 :     WorkList.append(Terminals.begin(), Terminals.end());
    3535             :   }
    3536             :   // Try coalescing the collected copies immediately, and remove the nulls.
    3537             :   // This prevents the WorkList from getting too large since most copies are
    3538             :   // joinable on the first attempt.
    3539             :   MutableArrayRef<MachineInstr*>
    3540      462693 :     CurrList(WorkList.begin() + PrevSize, WorkList.end());
    3541      462693 :   if (copyCoalesceWorkList(CurrList))
    3542      110664 :     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
    3543      221328 :                                nullptr), WorkList.end());
    3544      462693 : }
    3545             : 
    3546      366729 : void RegisterCoalescer::coalesceLocals() {
    3547      366729 :   copyCoalesceWorkList(LocalWorkList);
    3548     1193218 :   for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
    3549     1652974 :     if (LocalWorkList[j])
    3550       35676 :       WorkList.push_back(LocalWorkList[j]);
    3551             :   }
    3552             :   LocalWorkList.clear();
    3553      366731 : }
    3554             : 
    3555      195486 : void RegisterCoalescer::joinAllIntervals() {
    3556             :   LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
    3557             :   assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
    3558             : 
    3559             :   std::vector<MBBPriorityInfo> MBBs;
    3560      390972 :   MBBs.reserve(MF->size());
    3561      658178 :   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
    3562             :     MachineBasicBlock *MBB = &*I;
    3563      925385 :     MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
    3564      462693 :                                    JoinSplitEdges && isSplitEdge(MBB)));
    3565             :   }
    3566             :   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
    3567             : 
    3568             :   // Coalesce intervals in MBB priority order.
    3569             :   unsigned CurrDepth = std::numeric_limits<unsigned>::max();
    3570      853663 :   for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
    3571             :     // Try coalescing the collected local copies for deeper loops.
    3572      462692 :     if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
    3573      171245 :       coalesceLocals();
    3574      342486 :       CurrDepth = MBBs[i].Depth;
    3575             :     }
    3576      925380 :     copyCoalesceInMBB(MBBs[i].MBB);
    3577             :   }
    3578      195486 :   lateLiveIntervalUpdate();
    3579      195485 :   coalesceLocals();
    3580             : 
    3581             :   // Joining intervals can allow other intervals to be joined.  Iteratively join
    3582             :   // until we make no progress.
    3583      196474 :   while (copyCoalesceWorkList(WorkList))
    3584             :     /* empty */ ;
    3585      195485 :   lateLiveIntervalUpdate();
    3586      195486 : }
    3587             : 
    3588      195506 : void RegisterCoalescer::releaseMemory() {
    3589      195506 :   ErasedInstrs.clear();
    3590             :   WorkList.clear();
    3591             :   DeadDefs.clear();
    3592             :   InflateRegs.clear();
    3593      195506 : }
    3594             : 
    3595      195488 : bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
    3596      195488 :   MF = &fn;
    3597      195488 :   MRI = &fn.getRegInfo();
    3598      195488 :   const TargetSubtargetInfo &STI = fn.getSubtarget();
    3599      195488 :   TRI = STI.getRegisterInfo();
    3600      195488 :   TII = STI.getInstrInfo();
    3601      195488 :   LIS = &getAnalysis<LiveIntervals>();
    3602      195489 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    3603      195489 :   Loops = &getAnalysis<MachineLoopInfo>();
    3604      195489 :   if (EnableGlobalCopies == cl::BOU_UNSET)
    3605      195489 :     JoinGlobalCopies = STI.enableJoinGlobalCopies();
    3606             :   else
    3607           0 :     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
    3608             : 
    3609             :   // The MachineScheduler does not currently require JoinSplitEdges. This will
    3610             :   // either be enabled unconditionally or replaced by a more general live range
    3611             :   // splitting optimization.
    3612      195489 :   JoinSplitEdges = EnableJoinSplits;
    3613             : 
    3614             :   LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
    3615             :                     << "********** Function: " << MF->getName() << '\n');
    3616             : 
    3617      195489 :   if (VerifyCoalescing)
    3618          47 :     MF->verify(this, "Before register coalescing");
    3619             : 
    3620      195489 :   RegClassInfo.runOnMachineFunction(fn);
    3621             : 
    3622             :   // Join (coalesce) intervals if requested.
    3623      195489 :   if (EnableJoining)
    3624      195486 :     joinAllIntervals();
    3625             : 
    3626             :   // After deleting a lot of copies, register classes may be less constrained.
    3627             :   // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
    3628             :   // DPR inflation.
    3629             :   array_pod_sort(InflateRegs.begin(), InflateRegs.end());
    3630      195488 :   InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
    3631             :                     InflateRegs.end());
    3632             :   LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
    3633             :                     << " regs.\n");
    3634      208321 :   for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
    3635       12832 :     unsigned Reg = InflateRegs[i];
    3636       12832 :     if (MRI->reg_nodbg_empty(Reg))
    3637             :       continue;
    3638       11879 :     if (MRI->recomputeRegClass(Reg)) {
    3639             :       LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
    3640             :                         << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
    3641             :       ++NumInflated;
    3642             : 
    3643        4177 :       LiveInterval &LI = LIS->getInterval(Reg);
    3644        4177 :       if (LI.hasSubRanges()) {
    3645             :         // If the inflated register class does not support subregisters anymore
    3646             :         // remove the subranges.
    3647           0 :         if (!MRI->shouldTrackSubRegLiveness(Reg)) {
    3648           0 :           LI.clearSubRanges();
    3649             :         } else {
    3650             : #ifndef NDEBUG
    3651             :           LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
    3652             :           // If subranges are still supported, then the same subregs
    3653             :           // should still be supported.
    3654             :           for (LiveInterval::SubRange &S : LI.subranges()) {
    3655             :             assert((S.LaneMask & ~MaxMask).none());
    3656             :           }
    3657             : #endif
    3658             :         }
    3659             :       }
    3660             :     }
    3661             :   }
    3662             : 
    3663             :   LLVM_DEBUG(dump());
    3664      195489 :   if (VerifyCoalescing)
    3665          47 :     MF->verify(this, "After register coalescing");
    3666      195489 :   return true;
    3667             : }
    3668             : 
    3669           0 : void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
    3670           0 :    LIS->print(O, m);
    3671           0 : }

Generated by: LCOV version 1.13