LLVM  8.0.0svn
RegisterCoalescer.cpp
Go to the documentation of this file.
1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the generic RegisterCoalescer interface which
11 // is used as the common interface used by all clients and
12 // implementations of register coalescing.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "RegisterCoalescer.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
36 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/MC/LaneBitmask.h"
45 #include "llvm/MC/MCInstrDesc.h"
46 #include "llvm/MC/MCRegisterInfo.h"
47 #include "llvm/Pass.h"
49 #include "llvm/Support/Compiler.h"
50 #include "llvm/Support/Debug.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <iterator>
56 #include <limits>
57 #include <tuple>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "regalloc"
64 
65 STATISTIC(numJoins , "Number of interval joins performed");
66 STATISTIC(numCrossRCs , "Number of cross class joins performed");
67 STATISTIC(numCommutes , "Number of instruction commuting performed");
68 STATISTIC(numExtends , "Number of copies extended");
69 STATISTIC(NumReMats , "Number of instructions re-materialized");
70 STATISTIC(NumInflated , "Number of register classes inflated");
71 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72 STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
73 STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
74 
75 static cl::opt<bool> EnableJoining("join-liveintervals",
76  cl::desc("Coalesce copies (default=true)"),
77  cl::init(true), cl::Hidden);
78 
79 static cl::opt<bool> UseTerminalRule("terminal-rule",
80  cl::desc("Apply the terminal rule"),
81  cl::init(false), cl::Hidden);
82 
83 /// Temporary flag to test critical edge unsplitting.
84 static cl::opt<bool>
85 EnableJoinSplits("join-splitedges",
86  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87 
88 /// Temporary flag to test global copy optimization.
90 EnableGlobalCopies("join-globalcopies",
91  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
93 
94 static cl::opt<bool>
95 VerifyCoalescing("verify-coalescing",
96  cl::desc("Verify machine instrs before and after register coalescing"),
97  cl::Hidden);
98 
100  "late-remat-update-threshold", cl::Hidden,
101  cl::desc("During rematerialization for a copy, if the def instruction has "
102  "many other copy uses to be rematerialized, delay the multiple "
103  "separate live interval update work and do them all at once after "
104  "all those rematerialization are done. It will save a lot of "
105  "repeated work. "),
106  cl::init(100));
107 
108 namespace {
109 
110  class RegisterCoalescer : public MachineFunctionPass,
111  private LiveRangeEdit::Delegate {
112  MachineFunction* MF;
114  const TargetRegisterInfo* TRI;
115  const TargetInstrInfo* TII;
116  LiveIntervals *LIS;
117  const MachineLoopInfo* Loops;
118  AliasAnalysis *AA;
119  RegisterClassInfo RegClassInfo;
120 
121  /// A LaneMask to remember on which subregister live ranges we need to call
122  /// shrinkToUses() later.
123  LaneBitmask ShrinkMask;
124 
125  /// True if the main range of the currently coalesced intervals should be
126  /// checked for smaller live intervals.
127  bool ShrinkMainRange;
128 
129  /// True if the coalescer should aggressively coalesce global copies
130  /// in favor of keeping local copies.
131  bool JoinGlobalCopies;
132 
133  /// True if the coalescer should aggressively coalesce fall-thru
134  /// blocks exclusively containing copies.
135  bool JoinSplitEdges;
136 
137  /// Copy instructions yet to be coalesced.
139  SmallVector<MachineInstr*, 8> LocalWorkList;
140 
141  /// Set of instruction pointers that have been erased, and
142  /// that may be present in WorkList.
143  SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
144 
145  /// Dead instructions that are about to be deleted.
147 
148  /// Virtual registers to be considered for register class inflation.
149  SmallVector<unsigned, 8> InflateRegs;
150 
151  /// The collection of live intervals which should have been updated
152  /// immediately after rematerialiation but delayed until
153  /// lateLiveIntervalUpdate is called.
154  DenseSet<unsigned> ToBeUpdated;
155 
156  /// Recursively eliminate dead defs in DeadDefs.
157  void eliminateDeadDefs();
158 
159  /// LiveRangeEdit callback for eliminateDeadDefs().
160  void LRE_WillEraseInstruction(MachineInstr *MI) override;
161 
162  /// Coalesce the LocalWorkList.
163  void coalesceLocals();
164 
165  /// Join compatible live intervals
166  void joinAllIntervals();
167 
168  /// Coalesce copies in the specified MBB, putting
169  /// copies that cannot yet be coalesced into WorkList.
170  void copyCoalesceInMBB(MachineBasicBlock *MBB);
171 
172  /// Tries to coalesce all copies in CurrList. Returns true if any progress
173  /// was made.
174  bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
175 
176  /// If one def has many copy like uses, and those copy uses are all
177  /// rematerialized, the live interval update needed for those
178  /// rematerializations will be delayed and done all at once instead
179  /// of being done multiple times. This is to save compile cost becuase
180  /// live interval update is costly.
181  void lateLiveIntervalUpdate();
182 
183  /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
184  /// src/dst of the copy instruction CopyMI. This returns true if the copy
185  /// was successfully coalesced away. If it is not currently possible to
186  /// coalesce this interval, but it may be possible if other things get
187  /// coalesced, then it returns true by reference in 'Again'.
188  bool joinCopy(MachineInstr *CopyMI, bool &Again);
189 
190  /// Attempt to join these two intervals. On failure, this
191  /// returns false. The output "SrcInt" will not have been modified, so we
192  /// can use this information below to update aliases.
193  bool joinIntervals(CoalescerPair &CP);
194 
195  /// Attempt joining two virtual registers. Return true on success.
196  bool joinVirtRegs(CoalescerPair &CP);
197 
198  /// Attempt joining with a reserved physreg.
199  bool joinReservedPhysReg(CoalescerPair &CP);
200 
201  /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
202  /// Subranges in @p LI which only partially interfere with the desired
203  /// LaneMask are split as necessary. @p LaneMask are the lanes that
204  /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
205  /// lanemasks already adjusted to the coalesced register.
206  void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
207  LaneBitmask LaneMask, CoalescerPair &CP);
208 
209  /// Join the liveranges of two subregisters. Joins @p RRange into
210  /// @p LRange, @p RRange may be invalid afterwards.
211  void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
212  LaneBitmask LaneMask, const CoalescerPair &CP);
213 
214  /// We found a non-trivially-coalescable copy. If the source value number is
215  /// defined by a copy from the destination reg see if we can merge these two
216  /// destination reg valno# into a single value number, eliminating a copy.
217  /// This returns true if an interval was modified.
218  bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
219 
220  /// Return true if there are definitions of IntB
221  /// other than BValNo val# that can reach uses of AValno val# of IntA.
222  bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
223  VNInfo *AValNo, VNInfo *BValNo);
224 
225  /// We found a non-trivially-coalescable copy.
226  /// If the source value number is defined by a commutable instruction and
227  /// its other operand is coalesced to the copy dest register, see if we
228  /// can transform the copy into a noop by commuting the definition.
229  /// This returns true if an interval was modified.
230  bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
231 
232  /// We found a copy which can be moved to its less frequent predecessor.
233  bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
234 
235  /// If the source of a copy is defined by a
236  /// trivial computation, replace the copy by rematerialize the definition.
237  bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
238  bool &IsDefCopy);
239 
240  /// Return true if a copy involving a physreg should be joined.
241  bool canJoinPhys(const CoalescerPair &CP);
242 
243  /// Replace all defs and uses of SrcReg to DstReg and update the subregister
244  /// number if it is not zero. If DstReg is a physical register and the
245  /// existing subregister number of the def / use being updated is not zero,
246  /// make sure to set it to the correct physical subregister.
247  void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
248 
249  /// If the given machine operand reads only undefined lanes add an undef
250  /// flag.
251  /// This can happen when undef uses were previously concealed by a copy
252  /// which we coalesced. Example:
253  /// %0:sub0<def,read-undef> = ...
254  /// %1 = COPY %0 <-- Coalescing COPY reveals undef
255  /// = use %1:sub1 <-- hidden undef use
256  void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
257  MachineOperand &MO, unsigned SubRegIdx);
258 
259  /// Handle copies of undef values. If the undef value is an incoming
260  /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
261  /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
262  /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
263  MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
264 
265  /// Check whether or not we should apply the terminal rule on the
266  /// destination (Dst) of \p Copy.
267  /// When the terminal rule applies, Copy is not profitable to
268  /// coalesce.
269  /// Dst is terminal if it has exactly one affinity (Dst, Src) and
270  /// at least one interference (Dst, Dst2). If Dst is terminal, the
271  /// terminal rule consists in checking that at least one of
272  /// interfering node, say Dst2, has an affinity of equal or greater
273  /// weight with Src.
274  /// In that case, Dst2 and Dst will not be able to be both coalesced
275  /// with Src. Since Dst2 exposes more coalescing opportunities than
276  /// Dst, we can drop \p Copy.
277  bool applyTerminalRule(const MachineInstr &Copy) const;
278 
279  /// Wrapper method for \see LiveIntervals::shrinkToUses.
280  /// This method does the proper fixing of the live-ranges when the afore
281  /// mentioned method returns true.
282  void shrinkToUses(LiveInterval *LI,
284  NumShrinkToUses++;
285  if (LIS->shrinkToUses(LI, Dead)) {
286  /// Check whether or not \p LI is composed by multiple connected
287  /// components and if that is the case, fix that.
289  LIS->splitSeparateComponents(*LI, SplitLIs);
290  }
291  }
292 
293  /// Wrapper Method to do all the necessary work when an Instruction is
294  /// deleted.
295  /// Optimizations should use this to make sure that deleted instructions
296  /// are always accounted for.
297  void deleteInstr(MachineInstr* MI) {
298  ErasedInstrs.insert(MI);
299  LIS->RemoveMachineInstrFromMaps(*MI);
300  MI->eraseFromParent();
301  }
302 
303  public:
304  static char ID; ///< Class identification, replacement for typeinfo
305 
306  RegisterCoalescer() : MachineFunctionPass(ID) {
308  }
309 
310  void getAnalysisUsage(AnalysisUsage &AU) const override;
311 
312  void releaseMemory() override;
313 
314  /// This is the pass entry point.
315  bool runOnMachineFunction(MachineFunction&) override;
316 
317  /// Implement the dump method.
318  void print(raw_ostream &O, const Module* = nullptr) const override;
319  };
320 
321 } // end anonymous namespace
322 
323 char RegisterCoalescer::ID = 0;
324 
326 
327 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
328  "Simple Register Coalescing", false, false)
333 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
334  "Simple Register Coalescing", false, false)
335 
337  unsigned &Src, unsigned &Dst,
338  unsigned &SrcSub, unsigned &DstSub) {
339  if (MI->isCopy()) {
340  Dst = MI->getOperand(0).getReg();
341  DstSub = MI->getOperand(0).getSubReg();
342  Src = MI->getOperand(1).getReg();
343  SrcSub = MI->getOperand(1).getSubReg();
344  } else if (MI->isSubregToReg()) {
345  Dst = MI->getOperand(0).getReg();
346  DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
347  MI->getOperand(3).getImm());
348  Src = MI->getOperand(2).getReg();
349  SrcSub = MI->getOperand(2).getSubReg();
350  } else
351  return false;
352  return true;
353 }
354 
355 /// Return true if this block should be vacated by the coalescer to eliminate
356 /// branches. The important cases to handle in the coalescer are critical edges
357 /// split during phi elimination which contain only copies. Simple blocks that
358 /// contain non-branches should also be vacated, but this can be handled by an
359 /// earlier pass similar to early if-conversion.
360 static bool isSplitEdge(const MachineBasicBlock *MBB) {
361  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
362  return false;
363 
364  for (const auto &MI : *MBB) {
365  if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
366  return false;
367  }
368  return true;
369 }
370 
372  SrcReg = DstReg = 0;
373  SrcIdx = DstIdx = 0;
374  NewRC = nullptr;
375  Flipped = CrossClass = false;
376 
377  unsigned Src, Dst, SrcSub, DstSub;
378  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
379  return false;
380  Partial = SrcSub || DstSub;
381 
382  // If one register is a physreg, it must be Dst.
385  return false;
386  std::swap(Src, Dst);
387  std::swap(SrcSub, DstSub);
388  Flipped = true;
389  }
390 
391  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
392 
394  // Eliminate DstSub on a physreg.
395  if (DstSub) {
396  Dst = TRI.getSubReg(Dst, DstSub);
397  if (!Dst) return false;
398  DstSub = 0;
399  }
400 
401  // Eliminate SrcSub by picking a corresponding Dst superregister.
402  if (SrcSub) {
403  Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
404  if (!Dst) return false;
405  } else if (!MRI.getRegClass(Src)->contains(Dst)) {
406  return false;
407  }
408  } else {
409  // Both registers are virtual.
410  const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
411  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
412 
413  // Both registers have subreg indices.
414  if (SrcSub && DstSub) {
415  // Copies between different sub-registers are never coalescable.
416  if (Src == Dst && SrcSub != DstSub)
417  return false;
418 
419  NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
420  SrcIdx, DstIdx);
421  if (!NewRC)
422  return false;
423  } else if (DstSub) {
424  // SrcReg will be merged with a sub-register of DstReg.
425  SrcIdx = DstSub;
426  NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
427  } else if (SrcSub) {
428  // DstReg will be merged with a sub-register of SrcReg.
429  DstIdx = SrcSub;
430  NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
431  } else {
432  // This is a straight copy without sub-registers.
433  NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
434  }
435 
436  // The combined constraint may be impossible to satisfy.
437  if (!NewRC)
438  return false;
439 
440  // Prefer SrcReg to be a sub-register of DstReg.
441  // FIXME: Coalescer should support subregs symmetrically.
442  if (DstIdx && !SrcIdx) {
443  std::swap(Src, Dst);
444  std::swap(SrcIdx, DstIdx);
445  Flipped = !Flipped;
446  }
447 
448  CrossClass = NewRC != DstRC || NewRC != SrcRC;
449  }
450  // Check our invariants
451  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
453  "Cannot have a physical SubIdx");
454  SrcReg = Src;
455  DstReg = Dst;
456  return true;
457 }
458 
461  return false;
462  std::swap(SrcReg, DstReg);
463  std::swap(SrcIdx, DstIdx);
464  Flipped = !Flipped;
465  return true;
466 }
467 
469  if (!MI)
470  return false;
471  unsigned Src, Dst, SrcSub, DstSub;
472  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
473  return false;
474 
475  // Find the virtual register that is SrcReg.
476  if (Dst == SrcReg) {
477  std::swap(Src, Dst);
478  std::swap(SrcSub, DstSub);
479  } else if (Src != SrcReg) {
480  return false;
481  }
482 
483  // Now check that Dst matches DstReg.
486  return false;
487  assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
488  // DstSub could be set for a physreg from INSERT_SUBREG.
489  if (DstSub)
490  Dst = TRI.getSubReg(Dst, DstSub);
491  // Full copy of Src.
492  if (!SrcSub)
493  return DstReg == Dst;
494  // This is a partial register copy. Check that the parts match.
495  return TRI.getSubReg(DstReg, SrcSub) == Dst;
496  } else {
497  // DstReg is virtual.
498  if (DstReg != Dst)
499  return false;
500  // Registers match, do the subregisters line up?
501  return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
502  TRI.composeSubRegIndices(DstIdx, DstSub);
503  }
504 }
505 
506 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
507  AU.setPreservesCFG();
516 }
517 
518 void RegisterCoalescer::eliminateDeadDefs() {
519  SmallVector<unsigned, 8> NewRegs;
520  LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
521  nullptr, this).eliminateDeadDefs(DeadDefs);
522 }
523 
524 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
525  // MI may be in WorkList. Make sure we don't visit it.
526  ErasedInstrs.insert(MI);
527 }
528 
529 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
530  MachineInstr *CopyMI) {
531  assert(!CP.isPartial() && "This doesn't work for partial copies.");
532  assert(!CP.isPhys() && "This doesn't work for physreg copies.");
533 
534  LiveInterval &IntA =
535  LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
536  LiveInterval &IntB =
537  LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
538  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
539 
540  // We have a non-trivially-coalescable copy with IntA being the source and
541  // IntB being the dest, thus this defines a value number in IntB. If the
542  // source value number (in IntA) is defined by a copy from B, see if we can
543  // merge these two pieces of B into a single value number, eliminating a copy.
544  // For example:
545  //
546  // A3 = B0
547  // ...
548  // B1 = A3 <- this copy
549  //
550  // In this case, B0 can be extended to where the B1 copy lives, allowing the
551  // B1 value number to be replaced with B0 (which simplifies the B
552  // liveinterval).
553 
554  // BValNo is a value number in B that is defined by a copy from A. 'B1' in
555  // the example above.
557  if (BS == IntB.end()) return false;
558  VNInfo *BValNo = BS->valno;
559 
560  // Get the location that B is defined at. Two options: either this value has
561  // an unknown definition point or it is defined at CopyIdx. If unknown, we
562  // can't process it.
563  if (BValNo->def != CopyIdx) return false;
564 
565  // AValNo is the value number in A that defines the copy, A3 in the example.
566  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
567  LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
568  // The live segment might not exist after fun with physreg coalescing.
569  if (AS == IntA.end()) return false;
570  VNInfo *AValNo = AS->valno;
571 
572  // If AValNo is defined as a copy from IntB, we can potentially process this.
573  // Get the instruction that defines this value number.
574  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
575  // Don't allow any partial copies, even if isCoalescable() allows them.
576  if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
577  return false;
578 
579  // Get the Segment in IntB that this value number starts with.
581  IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
582  if (ValS == IntB.end())
583  return false;
584 
585  // Make sure that the end of the live segment is inside the same block as
586  // CopyMI.
587  MachineInstr *ValSEndInst =
588  LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
589  if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
590  return false;
591 
592  // Okay, we now know that ValS ends in the same block that the CopyMI
593  // live-range starts. If there are no intervening live segments between them
594  // in IntB, we can merge them.
595  if (ValS+1 != BS) return false;
596 
597  LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
598 
599  SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
600  // We are about to delete CopyMI, so need to remove it as the 'instruction
601  // that defines this value #'. Update the valnum with the new defining
602  // instruction #.
603  BValNo->def = FillerStart;
604 
605  // Okay, we can merge them. We need to insert a new liverange:
606  // [ValS.end, BS.begin) of either value number, then we merge the
607  // two value numbers.
608  IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
609 
610  // Okay, merge "B1" into the same value number as "B0".
611  if (BValNo != ValS->valno)
612  IntB.MergeValueNumberInto(BValNo, ValS->valno);
613 
614  // Do the same for the subregister segments.
615  for (LiveInterval::SubRange &S : IntB.subranges()) {
616  // Check for SubRange Segments of the form [1234r,1234d:0) which can be
617  // removed to prevent creating bogus SubRange Segments.
618  LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
619  if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
620  S.removeSegment(*SS, true);
621  continue;
622  }
623  VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
624  S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
625  VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
626  if (SubBValNo != SubValSNo)
627  S.MergeValueNumberInto(SubBValNo, SubValSNo);
628  }
629 
630  LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
631 
632  // If the source instruction was killing the source register before the
633  // merge, unset the isKill marker given the live range has been extended.
634  int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
635  if (UIdx != -1) {
636  ValSEndInst->getOperand(UIdx).setIsKill(false);
637  }
638 
639  // Rewrite the copy.
640  CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
641  // If the copy instruction was killing the destination register or any
642  // subrange before the merge trim the live range.
643  bool RecomputeLiveRange = AS->end == CopyIdx;
644  if (!RecomputeLiveRange) {
645  for (LiveInterval::SubRange &S : IntA.subranges()) {
646  LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
647  if (SS != S.end() && SS->end == CopyIdx) {
648  RecomputeLiveRange = true;
649  break;
650  }
651  }
652  }
653  if (RecomputeLiveRange)
654  shrinkToUses(&IntA);
655 
656  ++numExtends;
657  return true;
658 }
659 
660 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
661  LiveInterval &IntB,
662  VNInfo *AValNo,
663  VNInfo *BValNo) {
664  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
665  // the PHI values.
666  if (LIS->hasPHIKill(IntA, AValNo))
667  return true;
668 
669  for (LiveRange::Segment &ASeg : IntA.segments) {
670  if (ASeg.valno != AValNo) continue;
672  std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
673  if (BI != IntB.begin())
674  --BI;
675  for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
676  if (BI->valno == BValNo)
677  continue;
678  if (BI->start <= ASeg.start && BI->end > ASeg.start)
679  return true;
680  if (BI->start > ASeg.start && BI->start < ASeg.end)
681  return true;
682  }
683  }
684  return false;
685 }
686 
687 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
688 /// range @Dst and use value number @p DstValNo there.
689 static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
690  const LiveRange &Src, const VNInfo *SrcValNo) {
691  for (const LiveRange::Segment &S : Src.segments) {
692  if (S.valno != SrcValNo)
693  continue;
694  Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
695  }
696 }
697 
698 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
699  MachineInstr *CopyMI) {
700  assert(!CP.isPhys());
701 
702  LiveInterval &IntA =
703  LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
704  LiveInterval &IntB =
705  LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
706 
707  // We found a non-trivially-coalescable copy with IntA being the source and
708  // IntB being the dest, thus this defines a value number in IntB. If the
709  // source value number (in IntA) is defined by a commutable instruction and
710  // its other operand is coalesced to the copy dest register, see if we can
711  // transform the copy into a noop by commuting the definition. For example,
712  //
713  // A3 = op A2 killed B0
714  // ...
715  // B1 = A3 <- this copy
716  // ...
717  // = op A3 <- more uses
718  //
719  // ==>
720  //
721  // B2 = op B0 killed A2
722  // ...
723  // B1 = B2 <- now an identity copy
724  // ...
725  // = op B2 <- more uses
726 
727  // BValNo is a value number in B that is defined by a copy from A. 'B1' in
728  // the example above.
729  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
730  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
731  assert(BValNo != nullptr && BValNo->def == CopyIdx);
732 
733  // AValNo is the value number in A that defines the copy, A3 in the example.
734  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
735  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
736  if (AValNo->isPHIDef())
737  return false;
739  if (!DefMI)
740  return false;
741  if (!DefMI->isCommutable())
742  return false;
743  // If DefMI is a two-address instruction then commuting it will change the
744  // destination register.
745  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
746  assert(DefIdx != -1);
747  unsigned UseOpIdx;
748  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
749  return false;
750 
751  // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
752  // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
753  // passed to the method. That _other_ operand is chosen by
754  // the findCommutedOpIndices() method.
755  //
756  // That is obviously an area for improvement in case of instructions having
757  // more than 2 operands. For example, if some instruction has 3 commutable
758  // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
759  // op#2<->op#3) of commute transformation should be considered/tried here.
760  unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
761  if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
762  return false;
763 
764  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
765  unsigned NewReg = NewDstMO.getReg();
766  if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
767  return false;
768 
769  // Make sure there are no other definitions of IntB that would reach the
770  // uses which the new definition can reach.
771  if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
772  return false;
773 
774  // If some of the uses of IntA.reg is already coalesced away, return false.
775  // It's not possible to determine whether it's safe to perform the coalescing.
776  for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
777  MachineInstr *UseMI = MO.getParent();
778  unsigned OpNo = &MO - &UseMI->getOperand(0);
779  SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
780  LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
781  if (US == IntA.end() || US->valno != AValNo)
782  continue;
783  // If this use is tied to a def, we can't rewrite the register.
784  if (UseMI->isRegTiedToDefOperand(OpNo))
785  return false;
786  }
787 
788  LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
789  << *DefMI);
790 
791  // At this point we have decided that it is legal to do this
792  // transformation. Start by commuting the instruction.
793  MachineBasicBlock *MBB = DefMI->getParent();
794  MachineInstr *NewMI =
795  TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
796  if (!NewMI)
797  return false;
800  !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
801  return false;
802  if (NewMI != DefMI) {
803  LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
805  MBB->insert(Pos, NewMI);
806  MBB->erase(DefMI);
807  }
808 
809  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
810  // A = or A, B
811  // ...
812  // B = A
813  // ...
814  // C = killed A
815  // ...
816  // = B
817 
818  // Update uses of IntA of the specific Val# with IntB.
819  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
820  UE = MRI->use_end();
821  UI != UE; /* ++UI is below because of possible MI removal */) {
822  MachineOperand &UseMO = *UI;
823  ++UI;
824  if (UseMO.isUndef())
825  continue;
826  MachineInstr *UseMI = UseMO.getParent();
827  if (UseMI->isDebugValue()) {
828  // FIXME These don't have an instruction index. Not clear we have enough
829  // info to decide whether to do this replacement or not. For now do it.
830  UseMO.setReg(NewReg);
831  continue;
832  }
833  SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
834  LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
835  assert(US != IntA.end() && "Use must be live");
836  if (US->valno != AValNo)
837  continue;
838  // Kill flags are no longer accurate. They are recomputed after RA.
839  UseMO.setIsKill(false);
841  UseMO.substPhysReg(NewReg, *TRI);
842  else
843  UseMO.setReg(NewReg);
844  if (UseMI == CopyMI)
845  continue;
846  if (!UseMI->isCopy())
847  continue;
848  if (UseMI->getOperand(0).getReg() != IntB.reg ||
849  UseMI->getOperand(0).getSubReg())
850  continue;
851 
852  // This copy will become a noop. If it's defining a new val#, merge it into
853  // BValNo.
854  SlotIndex DefIdx = UseIdx.getRegSlot();
855  VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
856  if (!DVNI)
857  continue;
858  LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
859  assert(DVNI->def == DefIdx);
860  BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
861  for (LiveInterval::SubRange &S : IntB.subranges()) {
862  VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
863  if (!SubDVNI)
864  continue;
865  VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
866  assert(SubBValNo->def == CopyIdx);
867  S.MergeValueNumberInto(SubDVNI, SubBValNo);
868  }
869 
870  deleteInstr(UseMI);
871  }
872 
873  // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
874  // is updated.
876  if (IntB.hasSubRanges()) {
877  if (!IntA.hasSubRanges()) {
878  LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
879  IntA.createSubRangeFrom(Allocator, Mask, IntA);
880  }
881  SlotIndex AIdx = CopyIdx.getRegSlot(true);
882  for (LiveInterval::SubRange &SA : IntA.subranges()) {
883  VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
884  assert(ASubValNo != nullptr);
885 
886  IntB.refineSubRanges(Allocator, SA.LaneMask,
887  [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
888  VNInfo *BSubValNo = SR.empty()
889  ? SR.getNextValue(CopyIdx, Allocator)
890  : SR.getVNInfoAt(CopyIdx);
891  assert(BSubValNo != nullptr);
892  addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
893  });
894  }
895  }
896 
897  BValNo->def = AValNo->def;
898  addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
899  LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
900 
901  LIS->removeVRegDefAt(IntA, AValNo->def);
902 
903  LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
904  ++numCommutes;
905  return true;
906 }
907 
908 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
909 /// predecessor of BB2, and if B is not redefined on the way from A = B
910 /// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
911 /// execution goes through the path from BB0 to BB2. We may move B = A
912 /// to the predecessor without such reversed copy.
913 /// So we will transform the program from:
914 /// BB0:
915 /// A = B; BB1:
916 /// ... ...
917 /// / \ /
918 /// BB2:
919 /// ...
920 /// B = A;
921 ///
922 /// to:
923 ///
924 /// BB0: BB1:
925 /// A = B; ...
926 /// ... B = A;
927 /// / \ /
928 /// BB2:
929 /// ...
930 ///
931 /// A special case is when BB0 and BB2 are the same BB which is the only
932 /// BB in a loop:
933 /// BB1:
934 /// ...
935 /// BB0/BB2: ----
936 /// B = A; |
937 /// ... |
938 /// A = B; |
939 /// |-------
940 /// |
941 /// We may hoist B = A from BB0/BB2 to BB1.
942 ///
943 /// The major preconditions for correctness to remove such partial
944 /// redundancy include:
945 /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
946 /// the PHI is defined by the reversed copy A = B in BB0.
947 /// 2. No B is referenced from the start of BB2 to B = A.
948 /// 3. No B is defined from A = B to the end of BB0.
949 /// 4. BB1 has only one successor.
950 ///
951 /// 2 and 4 implicitly ensure B is not live at the end of BB1.
952 /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
953 /// colder place, which not only prevent endless loop, but also make sure
954 /// the movement of copy is beneficial.
955 bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
956  MachineInstr &CopyMI) {
957  assert(!CP.isPhys());
958  if (!CopyMI.isFullCopy())
959  return false;
960 
961  MachineBasicBlock &MBB = *CopyMI.getParent();
962  if (MBB.isEHPad())
963  return false;
964 
965  if (MBB.pred_size() != 2)
966  return false;
967 
968  LiveInterval &IntA =
969  LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
970  LiveInterval &IntB =
971  LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
972 
973  // A is defined by PHI at the entry of MBB.
974  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
975  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
976  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
977  if (!AValNo->isPHIDef())
978  return false;
979 
980  // No B is referenced before CopyMI in MBB.
981  if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
982  return false;
983 
984  // MBB has two predecessors: one contains A = B so no copy will be inserted
985  // for it. The other one will have a copy moved from MBB.
986  bool FoundReverseCopy = false;
987  MachineBasicBlock *CopyLeftBB = nullptr;
988  for (MachineBasicBlock *Pred : MBB.predecessors()) {
989  VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
991  if (!DefMI || !DefMI->isFullCopy()) {
992  CopyLeftBB = Pred;
993  continue;
994  }
995  // Check DefMI is a reverse copy and it is in BB Pred.
996  if (DefMI->getOperand(0).getReg() != IntA.reg ||
997  DefMI->getOperand(1).getReg() != IntB.reg ||
998  DefMI->getParent() != Pred) {
999  CopyLeftBB = Pred;
1000  continue;
1001  }
1002  // If there is any other def of B after DefMI and before the end of Pred,
1003  // we need to keep the copy of B = A at the end of Pred if we remove
1004  // B = A from MBB.
1005  bool ValB_Changed = false;
1006  for (auto VNI : IntB.valnos) {
1007  if (VNI->isUnused())
1008  continue;
1009  if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1010  ValB_Changed = true;
1011  break;
1012  }
1013  }
1014  if (ValB_Changed) {
1015  CopyLeftBB = Pred;
1016  continue;
1017  }
1018  FoundReverseCopy = true;
1019  }
1020 
1021  // If no reverse copy is found in predecessors, nothing to do.
1022  if (!FoundReverseCopy)
1023  return false;
1024 
1025  // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1026  // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1027  // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1028  // update IntA/IntB.
1029  //
1030  // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1031  // MBB is hotter than CopyLeftBB.
1032  if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1033  return false;
1034 
1035  // Now (almost sure it's) ok to move copy.
1036  if (CopyLeftBB) {
1037  // Position in CopyLeftBB where we should insert new copy.
1038  auto InsPos = CopyLeftBB->getFirstTerminator();
1039 
1040  // Make sure that B isn't referenced in the terminators (if any) at the end
1041  // of the predecessor since we're about to insert a new definition of B
1042  // before them.
1043  if (InsPos != CopyLeftBB->end()) {
1044  SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1045  if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1046  return false;
1047  }
1048 
1049  LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1050  << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1051 
1052  // Insert new copy to CopyLeftBB.
1053  MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1054  TII->get(TargetOpcode::COPY), IntB.reg)
1055  .addReg(IntA.reg);
1056  SlotIndex NewCopyIdx =
1057  LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1058  IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1059  for (LiveInterval::SubRange &SR : IntB.subranges())
1060  SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1061 
1062  // If the newly created Instruction has an address of an instruction that was
1063  // deleted before (object recycled by the allocator) it needs to be removed from
1064  // the deleted list.
1065  ErasedInstrs.erase(NewCopyMI);
1066  } else {
1067  LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1068  << printMBBReference(MBB) << '\t' << CopyMI);
1069  }
1070 
1071  // Remove CopyMI.
1072  // Note: This is fine to remove the copy before updating the live-ranges.
1073  // While updating the live-ranges, we only look at slot indices and
1074  // never go back to the instruction.
1075  // Mark instructions as deleted.
1076  deleteInstr(&CopyMI);
1077 
1078  // Update the liveness.
1079  SmallVector<SlotIndex, 8> EndPoints;
1080  VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1081  LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1082  &EndPoints);
1083  BValNo->markUnused();
1084  // Extend IntB to the EndPoints of its original live interval.
1085  LIS->extendToIndices(IntB, EndPoints);
1086 
1087  // Now, do the same for its subranges.
1088  for (LiveInterval::SubRange &SR : IntB.subranges()) {
1089  EndPoints.clear();
1090  VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1091  assert(BValNo && "All sublanes should be live");
1092  LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1093  BValNo->markUnused();
1094  LIS->extendToIndices(SR, EndPoints);
1095  }
1096  // If any dead defs were extended, truncate them.
1097  shrinkToUses(&IntB);
1098 
1099  // Finally, update the live-range of IntA.
1100  shrinkToUses(&IntA);
1101  return true;
1102 }
1103 
1104 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1105 /// defining a subregister.
1106 static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
1108  "This code cannot handle physreg aliasing");
1109  for (const MachineOperand &Op : MI.operands()) {
1110  if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
1111  continue;
1112  // Return true if we define the full register or don't care about the value
1113  // inside other subregisters.
1114  if (Op.getSubReg() == 0 || Op.isUndef())
1115  return true;
1116  }
1117  return false;
1118 }
1119 
1120 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1121  MachineInstr *CopyMI,
1122  bool &IsDefCopy) {
1123  IsDefCopy = false;
1124  unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1125  unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1126  unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1127  unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1129  return false;
1130 
1131  LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1132  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1133  VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1134  assert(ValNo && "CopyMI input register not live");
1135  if (ValNo->isPHIDef() || ValNo->isUnused())
1136  return false;
1138  if (!DefMI)
1139  return false;
1140  if (DefMI->isCopyLike()) {
1141  IsDefCopy = true;
1142  return false;
1143  }
1144  if (!TII->isAsCheapAsAMove(*DefMI))
1145  return false;
1146  if (!TII->isTriviallyReMaterializable(*DefMI, AA))
1147  return false;
1148  if (!definesFullReg(*DefMI, SrcReg))
1149  return false;
1150  bool SawStore = false;
1151  if (!DefMI->isSafeToMove(AA, SawStore))
1152  return false;
1153  const MCInstrDesc &MCID = DefMI->getDesc();
1154  if (MCID.getNumDefs() != 1)
1155  return false;
1156  // Only support subregister destinations when the def is read-undef.
1157  MachineOperand &DstOperand = CopyMI->getOperand(0);
1158  unsigned CopyDstReg = DstOperand.getReg();
1159  if (DstOperand.getSubReg() && !DstOperand.isUndef())
1160  return false;
1161 
1162  // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1163  // the register substantially (beyond both source and dest size). This is bad
1164  // for performance since it can cascade through a function, introducing many
1165  // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1166  // around after a few subreg copies).
1167  if (SrcIdx && DstIdx)
1168  return false;
1169 
1170  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1171  if (!DefMI->isImplicitDef()) {
1173  unsigned NewDstReg = DstReg;
1174 
1175  unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1176  DefMI->getOperand(0).getSubReg());
1177  if (NewDstIdx)
1178  NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1179 
1180  // Finally, make sure that the physical subregister that will be
1181  // constructed later is permitted for the instruction.
1182  if (!DefRC->contains(NewDstReg))
1183  return false;
1184  } else {
1185  // Theoretically, some stack frame reference could exist. Just make sure
1186  // it hasn't actually happened.
1188  "Only expect to deal with virtual or physical registers");
1189  }
1190  }
1191 
1192  DebugLoc DL = CopyMI->getDebugLoc();
1193  MachineBasicBlock *MBB = CopyMI->getParent();
1195  std::next(MachineBasicBlock::iterator(CopyMI));
1196  TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
1197  MachineInstr &NewMI = *std::prev(MII);
1198  NewMI.setDebugLoc(DL);
1199 
1200  // In a situation like the following:
1201  // %0:subreg = instr ; DefMI, subreg = DstIdx
1202  // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
1203  // instead of widening %1 to the register class of %0 simply do:
1204  // %1 = instr
1205  const TargetRegisterClass *NewRC = CP.getNewRC();
1206  if (DstIdx != 0) {
1207  MachineOperand &DefMO = NewMI.getOperand(0);
1208  if (DefMO.getSubReg() == DstIdx) {
1209  assert(SrcIdx == 0 && CP.isFlipped()
1210  && "Shouldn't have SrcIdx+DstIdx at this point");
1211  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1212  const TargetRegisterClass *CommonRC =
1213  TRI->getCommonSubClass(DefRC, DstRC);
1214  if (CommonRC != nullptr) {
1215  NewRC = CommonRC;
1216  DstIdx = 0;
1217  DefMO.setSubReg(0);
1218  DefMO.setIsUndef(false); // Only subregs can have def+undef.
1219  }
1220  }
1221  }
1222 
1223  // CopyMI may have implicit operands, save them so that we can transfer them
1224  // over to the newly materialized instruction after CopyMI is removed.
1225  SmallVector<MachineOperand, 4> ImplicitOps;
1226  ImplicitOps.reserve(CopyMI->getNumOperands() -
1227  CopyMI->getDesc().getNumOperands());
1228  for (unsigned I = CopyMI->getDesc().getNumOperands(),
1229  E = CopyMI->getNumOperands();
1230  I != E; ++I) {
1231  MachineOperand &MO = CopyMI->getOperand(I);
1232  if (MO.isReg()) {
1233  assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1234  // Discard VReg implicit defs.
1236  ImplicitOps.push_back(MO);
1237  }
1238  }
1239 
1240  LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
1241  CopyMI->eraseFromParent();
1242  ErasedInstrs.insert(CopyMI);
1243 
1244  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1245  // We need to remember these so we can add intervals once we insert
1246  // NewMI into SlotIndexes.
1247  SmallVector<unsigned, 4> NewMIImplDefs;
1248  for (unsigned i = NewMI.getDesc().getNumOperands(),
1249  e = NewMI.getNumOperands();
1250  i != e; ++i) {
1251  MachineOperand &MO = NewMI.getOperand(i);
1252  if (MO.isReg() && MO.isDef()) {
1253  assert(MO.isImplicit() && MO.isDead() &&
1255  NewMIImplDefs.push_back(MO.getReg());
1256  }
1257  }
1258 
1260  unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1261 
1262  if (DefRC != nullptr) {
1263  if (NewIdx)
1264  NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1265  else
1266  NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1267  assert(NewRC && "subreg chosen for remat incompatible with instruction");
1268  }
1269  // Remap subranges to new lanemask and change register class.
1270  LiveInterval &DstInt = LIS->getInterval(DstReg);
1271  for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1272  SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1273  }
1274  MRI->setRegClass(DstReg, NewRC);
1275 
1276  // Update machine operands and add flags.
1277  updateRegDefsUses(DstReg, DstReg, DstIdx);
1278  NewMI.getOperand(0).setSubReg(NewIdx);
1279  // updateRegDefUses can add an "undef" flag to the definition, since
1280  // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1281  // sure that "undef" is not set.
1282  if (NewIdx == 0)
1283  NewMI.getOperand(0).setIsUndef(false);
1284  // Add dead subregister definitions if we are defining the whole register
1285  // but only part of it is live.
1286  // This could happen if the rematerialization instruction is rematerializing
1287  // more than actually is used in the register.
1288  // An example would be:
1289  // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1290  // ; Copying only part of the register here, but the rest is undef.
1291  // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1292  // ==>
1293  // ; Materialize all the constants but only using one
1294  // %2 = LOAD_CONSTANTS 5, 8
1295  //
1296  // at this point for the part that wasn't defined before we could have
1297  // subranges missing the definition.
1298  if (NewIdx == 0 && DstInt.hasSubRanges()) {
1299  SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1300  SlotIndex DefIndex =
1301  CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1302  LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1303  VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
1304  for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1305  if (!SR.liveAt(DefIndex))
1306  SR.createDeadDef(DefIndex, Alloc);
1307  MaxMask &= ~SR.LaneMask;
1308  }
1309  if (MaxMask.any()) {
1310  LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1311  SR->createDeadDef(DefIndex, Alloc);
1312  }
1313  }
1314 
1315  // Make sure that the subrange for resultant undef is removed
1316  // For example:
1317  // %1:sub1<def,read-undef> = LOAD CONSTANT 1
1318  // %2 = COPY %1
1319  // ==>
1320  // %2:sub1<def, read-undef> = LOAD CONSTANT 1
1321  // ; Correct but need to remove the subrange for %2:sub0
1322  // ; as it is now undef
1323  if (NewIdx != 0 && DstInt.hasSubRanges()) {
1324  // The affected subregister segments can be removed.
1325  SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1326  LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1327  bool UpdatedSubRanges = false;
1328  for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1329  if ((SR.LaneMask & DstMask).none()) {
1330  LLVM_DEBUG(dbgs()
1331  << "Removing undefined SubRange "
1332  << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1333  // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1334  if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1335  SR.removeValNo(RmValNo);
1336  UpdatedSubRanges = true;
1337  }
1338  }
1339  }
1340  if (UpdatedSubRanges)
1341  DstInt.removeEmptySubRanges();
1342  }
1343  } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1344  // The New instruction may be defining a sub-register of what's actually
1345  // been asked for. If so it must implicitly define the whole thing.
1347  "Only expect virtual or physical registers in remat");
1348  NewMI.getOperand(0).setIsDead(true);
1350  CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1351  // Record small dead def live-ranges for all the subregisters
1352  // of the destination register.
1353  // Otherwise, variables that live through may miss some
1354  // interferences, thus creating invalid allocation.
1355  // E.g., i386 code:
1356  // %1 = somedef ; %1 GR8
1357  // %2 = remat ; %2 GR32
1358  // CL = COPY %2.sub_8bit
1359  // = somedef %1 ; %1 GR8
1360  // =>
1361  // %1 = somedef ; %1 GR8
1362  // dead ECX = remat ; implicit-def CL
1363  // = somedef %1 ; %1 GR8
1364  // %1 will see the interferences with CL but not with CH since
1365  // no live-ranges would have been created for ECX.
1366  // Fix that!
1367  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1368  for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1369  Units.isValid(); ++Units)
1370  if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1371  LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1372  }
1373 
1374  if (NewMI.getOperand(0).getSubReg())
1375  NewMI.getOperand(0).setIsUndef();
1376 
1377  // Transfer over implicit operands to the rematerialized instruction.
1378  for (MachineOperand &MO : ImplicitOps)
1379  NewMI.addOperand(MO);
1380 
1381  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1382  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1383  unsigned Reg = NewMIImplDefs[i];
1384  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1385  if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1386  LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1387  }
1388 
1389  LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1390  ++NumReMats;
1391 
1392  // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1393  // to describe DstReg instead.
1394  if (MRI->use_nodbg_empty(SrcReg)) {
1395  for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
1396  MachineInstr *UseMI = UseMO.getParent();
1397  if (UseMI->isDebugValue()) {
1398  UseMO.setReg(DstReg);
1399  // Move the debug value directly after the def of the rematerialized
1400  // value in DstReg.
1401  MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1402  LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1403  }
1404  }
1405  }
1406 
1407  if (ToBeUpdated.count(SrcReg))
1408  return true;
1409 
1410  unsigned NumCopyUses = 0;
1411  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1412  if (UseMO.getParent()->isCopyLike())
1413  NumCopyUses++;
1414  }
1415  if (NumCopyUses < LateRematUpdateThreshold) {
1416  // The source interval can become smaller because we removed a use.
1417  shrinkToUses(&SrcInt, &DeadDefs);
1418  if (!DeadDefs.empty())
1419  eliminateDeadDefs();
1420  } else {
1421  ToBeUpdated.insert(SrcReg);
1422  }
1423  return true;
1424 }
1425 
1426 MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1427  // ProcessImplicitDefs may leave some copies of <undef> values, it only
1428  // removes local variables. When we have a copy like:
1429  //
1430  // %1 = COPY undef %2
1431  //
1432  // We delete the copy and remove the corresponding value number from %1.
1433  // Any uses of that value number are marked as <undef>.
1434 
1435  // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1436  // CoalescerPair may have a new register class with adjusted subreg indices
1437  // at this point.
1438  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
1439  isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
1440 
1441  SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1442  const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1443  // CopyMI is undef iff SrcReg is not live before the instruction.
1444  if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1445  LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1446  for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1447  if ((SR.LaneMask & SrcMask).none())
1448  continue;
1449  if (SR.liveAt(Idx))
1450  return nullptr;
1451  }
1452  } else if (SrcLI.liveAt(Idx))
1453  return nullptr;
1454 
1455  // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1456  // then replace it with an IMPLICIT_DEF.
1457  LiveInterval &DstLI = LIS->getInterval(DstReg);
1458  SlotIndex RegIndex = Idx.getRegSlot();
1459  LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1460  assert(Seg != nullptr && "No segment for defining instruction");
1461  if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
1462  if (V->isPHIDef()) {
1463  CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1464  for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1465  MachineOperand &MO = CopyMI->getOperand(i-1);
1466  if (MO.isReg() && MO.isUse())
1467  CopyMI->RemoveOperand(i-1);
1468  }
1469  LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1470  "implicit def\n");
1471  return CopyMI;
1472  }
1473  }
1474 
1475  // Remove any DstReg segments starting at the instruction.
1476  LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1477 
1478  // Remove value or merge with previous one in case of a subregister def.
1479  if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1480  VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1481  DstLI.MergeValueNumberInto(VNI, PrevVNI);
1482 
1483  // The affected subregister segments can be removed.
1484  LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1485  for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1486  if ((SR.LaneMask & DstMask).none())
1487  continue;
1488 
1489  VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1490  assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1491  SR.removeValNo(SVNI);
1492  }
1493  DstLI.removeEmptySubRanges();
1494  } else
1495  LIS->removeVRegDefAt(DstLI, RegIndex);
1496 
1497  // Mark uses as undef.
1498  for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1499  if (MO.isDef() /*|| MO.isUndef()*/)
1500  continue;
1501  const MachineInstr &MI = *MO.getParent();
1502  SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1503  LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1504  bool isLive;
1505  if (!UseMask.all() && DstLI.hasSubRanges()) {
1506  isLive = false;
1507  for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1508  if ((SR.LaneMask & UseMask).none())
1509  continue;
1510  if (SR.liveAt(UseIdx)) {
1511  isLive = true;
1512  break;
1513  }
1514  }
1515  } else
1516  isLive = DstLI.liveAt(UseIdx);
1517  if (isLive)
1518  continue;
1519  MO.setIsUndef(true);
1520  LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1521  }
1522 
1523  // A def of a subregister may be a use of the other subregisters, so
1524  // deleting a def of a subregister may also remove uses. Since CopyMI
1525  // is still part of the function (but about to be erased), mark all
1526  // defs of DstReg in it as <undef>, so that shrinkToUses would
1527  // ignore them.
1528  for (MachineOperand &MO : CopyMI->operands())
1529  if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
1530  MO.setIsUndef(true);
1531  LIS->shrinkToUses(&DstLI);
1532 
1533  return CopyMI;
1534 }
1535 
1536 void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1537  MachineOperand &MO, unsigned SubRegIdx) {
1538  LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1539  if (MO.isDef())
1540  Mask = ~Mask;
1541  bool IsUndef = true;
1542  for (const LiveInterval::SubRange &S : Int.subranges()) {
1543  if ((S.LaneMask & Mask).none())
1544  continue;
1545  if (S.liveAt(UseIdx)) {
1546  IsUndef = false;
1547  break;
1548  }
1549  }
1550  if (IsUndef) {
1551  MO.setIsUndef(true);
1552  // We found out some subregister use is actually reading an undefined
1553  // value. In some cases the whole vreg has become undefined at this
1554  // point so we have to potentially shrink the main range if the
1555  // use was ending a live segment there.
1556  LiveQueryResult Q = Int.Query(UseIdx);
1557  if (Q.valueOut() == nullptr)
1558  ShrinkMainRange = true;
1559  }
1560 }
1561 
1562 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
1563  unsigned DstReg,
1564  unsigned SubIdx) {
1565  bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
1566  LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1567 
1568  if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1569  for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1570  unsigned SubReg = MO.getSubReg();
1571  if (SubReg == 0 || MO.isUndef())
1572  continue;
1573  MachineInstr &MI = *MO.getParent();
1574  if (MI.isDebugValue())
1575  continue;
1576  SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1577  addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1578  }
1579  }
1580 
1583  I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1584  I != E; ) {
1585  MachineInstr *UseMI = &*(I++);
1586 
1587  // Each instruction can only be rewritten once because sub-register
1588  // composition is not always idempotent. When SrcReg != DstReg, rewriting
1589  // the UseMI operands removes them from the SrcReg use-def chain, but when
1590  // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1591  // operands mentioning the virtual register.
1592  if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1593  continue;
1594 
1596  bool Reads, Writes;
1597  std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1598 
1599  // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1600  // because SrcReg is a sub-register.
1601  if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
1602  Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1603 
1604  // Replace SrcReg with DstReg in all UseMI operands.
1605  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1606  MachineOperand &MO = UseMI->getOperand(Ops[i]);
1607 
1608  // Adjust <undef> flags in case of sub-register joins. We don't want to
1609  // turn a full def into a read-modify-write sub-register def and vice
1610  // versa.
1611  if (SubIdx && MO.isDef())
1612  MO.setIsUndef(!Reads);
1613 
1614  // A subreg use of a partially undef (super) register may be a complete
1615  // undef use now and then has to be marked that way.
1616  if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
1617  if (!DstInt->hasSubRanges()) {
1619  LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
1620  DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
1621  }
1622  SlotIndex MIIdx = UseMI->isDebugValue()
1623  ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
1624  : LIS->getInstructionIndex(*UseMI);
1625  SlotIndex UseIdx = MIIdx.getRegSlot(true);
1626  addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
1627  }
1628 
1629  if (DstIsPhys)
1630  MO.substPhysReg(DstReg, *TRI);
1631  else
1632  MO.substVirtReg(DstReg, SubIdx, *TRI);
1633  }
1634 
1635  LLVM_DEBUG({
1636  dbgs() << "\t\tupdated: ";
1637  if (!UseMI->isDebugValue())
1638  dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1639  dbgs() << *UseMI;
1640  });
1641  }
1642 }
1643 
1644 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1645  // Always join simple intervals that are defined by a single copy from a
1646  // reserved register. This doesn't increase register pressure, so it is
1647  // always beneficial.
1648  if (!MRI->isReserved(CP.getDstReg())) {
1649  LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1650  return false;
1651  }
1652 
1653  LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1654  if (JoinVInt.containsOneValue())
1655  return true;
1656 
1657  LLVM_DEBUG(
1658  dbgs() << "\tCannot join complex intervals into reserved register.\n");
1659  return false;
1660 }
1661 
1662 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1663  Again = false;
1664  LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1665 
1666  CoalescerPair CP(*TRI);
1667  if (!CP.setRegisters(CopyMI)) {
1668  LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1669  return false;
1670  }
1671 
1672  if (CP.getNewRC()) {
1673  auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1674  auto DstRC = MRI->getRegClass(CP.getDstReg());
1675  unsigned SrcIdx = CP.getSrcIdx();
1676  unsigned DstIdx = CP.getDstIdx();
1677  if (CP.isFlipped()) {
1678  std::swap(SrcIdx, DstIdx);
1679  std::swap(SrcRC, DstRC);
1680  }
1681  if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1682  CP.getNewRC(), *LIS)) {
1683  LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1684  return false;
1685  }
1686  }
1687 
1688  // Dead code elimination. This really should be handled by MachineDCE, but
1689  // sometimes dead copies slip through, and we can't generate invalid live
1690  // ranges.
1691  if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1692  LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1693  DeadDefs.push_back(CopyMI);
1694  eliminateDeadDefs();
1695  return true;
1696  }
1697 
1698  // Eliminate undefs.
1699  if (!CP.isPhys()) {
1700  // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
1701  if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
1702  if (UndefMI->isImplicitDef())
1703  return false;
1704  deleteInstr(CopyMI);
1705  return false; // Not coalescable.
1706  }
1707  }
1708 
1709  // Coalesced copies are normally removed immediately, but transformations
1710  // like removeCopyByCommutingDef() can inadvertently create identity copies.
1711  // When that happens, just join the values and remove the copy.
1712  if (CP.getSrcReg() == CP.getDstReg()) {
1713  LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1714  LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1715  const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1716  LiveQueryResult LRQ = LI.Query(CopyIdx);
1717  if (VNInfo *DefVNI = LRQ.valueDefined()) {
1718  VNInfo *ReadVNI = LRQ.valueIn();
1719  assert(ReadVNI && "No value before copy and no <undef> flag.");
1720  assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1721  LI.MergeValueNumberInto(DefVNI, ReadVNI);
1722 
1723  // Process subregister liveranges.
1724  for (LiveInterval::SubRange &S : LI.subranges()) {
1725  LiveQueryResult SLRQ = S.Query(CopyIdx);
1726  if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1727  VNInfo *SReadVNI = SLRQ.valueIn();
1728  S.MergeValueNumberInto(SDefVNI, SReadVNI);
1729  }
1730  }
1731  LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
1732  }
1733  deleteInstr(CopyMI);
1734  return true;
1735  }
1736 
1737  // Enforce policies.
1738  if (CP.isPhys()) {
1739  LLVM_DEBUG(dbgs() << "\tConsidering merging "
1740  << printReg(CP.getSrcReg(), TRI) << " with "
1741  << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
1742  if (!canJoinPhys(CP)) {
1743  // Before giving up coalescing, if definition of source is defined by
1744  // trivial computation, try rematerializing it.
1745  bool IsDefCopy;
1746  if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1747  return true;
1748  if (IsDefCopy)
1749  Again = true; // May be possible to coalesce later.
1750  return false;
1751  }
1752  } else {
1753  // When possible, let DstReg be the larger interval.
1754  if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
1755  LIS->getInterval(CP.getDstReg()).size())
1756  CP.flip();
1757 
1758  LLVM_DEBUG({
1759  dbgs() << "\tConsidering merging to "
1760  << TRI->getRegClassName(CP.getNewRC()) << " with ";
1761  if (CP.getDstIdx() && CP.getSrcIdx())
1762  dbgs() << printReg(CP.getDstReg()) << " in "
1763  << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
1764  << printReg(CP.getSrcReg()) << " in "
1765  << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
1766  else
1767  dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
1768  << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
1769  });
1770  }
1771 
1772  ShrinkMask = LaneBitmask::getNone();
1773  ShrinkMainRange = false;
1774 
1775  // Okay, attempt to join these two intervals. On failure, this returns false.
1776  // Otherwise, if one of the intervals being joined is a physreg, this method
1777  // always canonicalizes DstInt to be it. The output "SrcInt" will not have
1778  // been modified, so we can use this information below to update aliases.
1779  if (!joinIntervals(CP)) {
1780  // Coalescing failed.
1781 
1782  // If definition of source is defined by trivial computation, try
1783  // rematerializing it.
1784  bool IsDefCopy;
1785  if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1786  return true;
1787 
1788  // If we can eliminate the copy without merging the live segments, do so
1789  // now.
1790  if (!CP.isPartial() && !CP.isPhys()) {
1791  if (adjustCopiesBackFrom(CP, CopyMI) ||
1792  removeCopyByCommutingDef(CP, CopyMI)) {
1793  deleteInstr(CopyMI);
1794  LLVM_DEBUG(dbgs() << "\tTrivial!\n");
1795  return true;
1796  }
1797  }
1798 
1799  // Try and see if we can partially eliminate the copy by moving the copy to
1800  // its predecessor.
1801  if (!CP.isPartial() && !CP.isPhys())
1802  if (removePartialRedundancy(CP, *CopyMI))
1803  return true;
1804 
1805  // Otherwise, we are unable to join the intervals.
1806  LLVM_DEBUG(dbgs() << "\tInterference!\n");
1807  Again = true; // May be possible to coalesce later.
1808  return false;
1809  }
1810 
1811  // Coalescing to a virtual register that is of a sub-register class of the
1812  // other. Make sure the resulting register is set to the right register class.
1813  if (CP.isCrossClass()) {
1814  ++numCrossRCs;
1815  MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
1816  }
1817 
1818  // Removing sub-register copies can ease the register class constraints.
1819  // Make sure we attempt to inflate the register class of DstReg.
1820  if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
1821  InflateRegs.push_back(CP.getDstReg());
1822 
1823  // CopyMI has been erased by joinIntervals at this point. Remove it from
1824  // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1825  // to the work list. This keeps ErasedInstrs from growing needlessly.
1826  ErasedInstrs.erase(CopyMI);
1827 
1828  // Rewrite all SrcReg operands to DstReg.
1829  // Also update DstReg operands to include DstIdx if it is set.
1830  if (CP.getDstIdx())
1831  updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
1832  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
1833 
1834  // Shrink subregister ranges if necessary.
1835  if (ShrinkMask.any()) {
1836  LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1837  for (LiveInterval::SubRange &S : LI.subranges()) {
1838  if ((S.LaneMask & ShrinkMask).none())
1839  continue;
1840  LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
1841  << ")\n");
1842  LIS->shrinkToUses(S, LI.reg);
1843  }
1844  LI.removeEmptySubRanges();
1845  }
1846  if (ShrinkMainRange) {
1847  LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1848  shrinkToUses(&LI);
1849  }
1850 
1851  // SrcReg is guaranteed to be the register whose live interval that is
1852  // being merged.
1853  LIS->removeInterval(CP.getSrcReg());
1854 
1855  // Update regalloc hint.
1856  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
1857 
1858  LLVM_DEBUG({
1859  dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
1860  << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
1861  dbgs() << "\tResult = ";
1862  if (CP.isPhys())
1863  dbgs() << printReg(CP.getDstReg(), TRI);
1864  else
1865  dbgs() << LIS->getInterval(CP.getDstReg());
1866  dbgs() << '\n';
1867  });
1868 
1869  ++numJoins;
1870  return true;
1871 }
1872 
1873 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
1874  unsigned DstReg = CP.getDstReg();
1875  unsigned SrcReg = CP.getSrcReg();
1876  assert(CP.isPhys() && "Must be a physreg copy");
1877  assert(MRI->isReserved(DstReg) && "Not a reserved register");
1878  LiveInterval &RHS = LIS->getInterval(SrcReg);
1879  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
1880 
1881  assert(RHS.containsOneValue() && "Invalid join with reserved register");
1882 
1883  // Optimization for reserved registers like ESP. We can only merge with a
1884  // reserved physreg if RHS has a single value that is a copy of DstReg.
1885  // The live range of the reserved register will look like a set of dead defs
1886  // - we don't properly track the live range of reserved registers.
1887 
1888  // Deny any overlapping intervals. This depends on all the reserved
1889  // register live ranges to look like dead defs.
1890  if (!MRI->isConstantPhysReg(DstReg)) {
1891  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
1892  // Abort if not all the regunits are reserved.
1893  for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
1894  if (!MRI->isReserved(*RI))
1895  return false;
1896  }
1897  if (RHS.overlaps(LIS->getRegUnit(*UI))) {
1898  LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
1899  << '\n');
1900  return false;
1901  }
1902  }
1903 
1904  // We must also check for overlaps with regmask clobbers.
1905  BitVector RegMaskUsable;
1906  if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
1907  !RegMaskUsable.test(DstReg)) {
1908  LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
1909  return false;
1910  }
1911  }
1912 
1913  // Skip any value computations, we are not adding new values to the
1914  // reserved register. Also skip merging the live ranges, the reserved
1915  // register live range doesn't need to be accurate as long as all the
1916  // defs are there.
1917 
1918  // Delete the identity copy.
1919  MachineInstr *CopyMI;
1920  if (CP.isFlipped()) {
1921  // Physreg is copied into vreg
1922  // %y = COPY %physreg_x
1923  // ... //< no other def of %x here
1924  // use %y
1925  // =>
1926  // ...
1927  // use %x
1928  CopyMI = MRI->getVRegDef(SrcReg);
1929  } else {
1930  // VReg is copied into physreg:
1931  // %y = def
1932  // ... //< no other def or use of %y here
1933  // %y = COPY %physreg_x
1934  // =>
1935  // %y = def
1936  // ...
1937  if (!MRI->hasOneNonDBGUse(SrcReg)) {
1938  LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
1939  return false;
1940  }
1941 
1942  if (!LIS->intervalIsInOneMBB(RHS)) {
1943  LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
1944  return false;
1945  }
1946 
1947  MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
1948  CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
1949  SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
1950  SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
1951 
1952  if (!MRI->isConstantPhysReg(DstReg)) {
1953  // We checked above that there are no interfering defs of the physical
1954  // register. However, for this case, where we intend to move up the def of
1955  // the physical register, we also need to check for interfering uses.
1956  SlotIndexes *Indexes = LIS->getSlotIndexes();
1957  for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
1958  SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
1960  if (MI->readsRegister(DstReg, TRI)) {
1961  LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
1962  return false;
1963  }
1964  }
1965  }
1966 
1967  // We're going to remove the copy which defines a physical reserved
1968  // register, so remove its valno, etc.
1969  LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
1970  << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
1971 
1972  LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
1973  // Create a new dead def at the new def location.
1974  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
1975  LiveRange &LR = LIS->getRegUnit(*UI);
1976  LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
1977  }
1978  }
1979 
1980  deleteInstr(CopyMI);
1981 
1982  // We don't track kills for reserved registers.
1983  MRI->clearKillFlags(CP.getSrcReg());
1984 
1985  return true;
1986 }
1987 
1988 //===----------------------------------------------------------------------===//
1989 // Interference checking and interval joining
1990 //===----------------------------------------------------------------------===//
1991 //
1992 // In the easiest case, the two live ranges being joined are disjoint, and
1993 // there is no interference to consider. It is quite common, though, to have
1994 // overlapping live ranges, and we need to check if the interference can be
1995 // resolved.
1996 //
1997 // The live range of a single SSA value forms a sub-tree of the dominator tree.
1998 // This means that two SSA values overlap if and only if the def of one value
1999 // is contained in the live range of the other value. As a special case, the
2000 // overlapping values can be defined at the same index.
2001 //
2002 // The interference from an overlapping def can be resolved in these cases:
2003 //
2004 // 1. Coalescable copies. The value is defined by a copy that would become an
2005 // identity copy after joining SrcReg and DstReg. The copy instruction will
2006 // be removed, and the value will be merged with the source value.
2007 //
2008 // There can be several copies back and forth, causing many values to be
2009 // merged into one. We compute a list of ultimate values in the joined live
2010 // range as well as a mappings from the old value numbers.
2011 //
2012 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2013 // predecessors have a live out value. It doesn't cause real interference,
2014 // and can be merged into the value it overlaps. Like a coalescable copy, it
2015 // can be erased after joining.
2016 //
2017 // 3. Copy of external value. The overlapping def may be a copy of a value that
2018 // is already in the other register. This is like a coalescable copy, but
2019 // the live range of the source register must be trimmed after erasing the
2020 // copy instruction:
2021 //
2022 // %src = COPY %ext
2023 // %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
2024 //
2025 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
2026 // defining one lane at a time:
2027 //
2028 // %dst:ssub0<def,read-undef> = FOO
2029 // %src = BAR
2030 // %dst:ssub1 = COPY %src
2031 //
2032 // The live range of %src overlaps the %dst value defined by FOO, but
2033 // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2034 // which was undef anyway.
2035 //
2036 // The value mapping is more complicated in this case. The final live range
2037 // will have different value numbers for both FOO and BAR, but there is no
2038 // simple mapping from old to new values. It may even be necessary to add
2039 // new PHI values.
2040 //
2041 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2042 // is live, but never read. This can happen because we don't compute
2043 // individual live ranges per lane.
2044 //
2045 // %dst = FOO
2046 // %src = BAR
2047 // %dst:ssub1 = COPY %src
2048 //
2049 // This kind of interference is only resolved locally. If the clobbered
2050 // lane value escapes the block, the join is aborted.
2051 
2052 namespace {
2053 
2054 /// Track information about values in a single virtual register about to be
2055 /// joined. Objects of this class are always created in pairs - one for each
2056 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
2057 /// pair)
2058 class JoinVals {
2059  /// Live range we work on.
2060  LiveRange &LR;
2061 
2062  /// (Main) register we work on.
2063  const unsigned Reg;
2064 
2065  /// Reg (and therefore the values in this liverange) will end up as
2066  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2067  /// CP.SrcIdx.
2068  const unsigned SubIdx;
2069 
2070  /// The LaneMask that this liverange will occupy the coalesced register. May
2071  /// be smaller than the lanemask produced by SubIdx when merging subranges.
2072  const LaneBitmask LaneMask;
2073 
2074  /// This is true when joining sub register ranges, false when joining main
2075  /// ranges.
2076  const bool SubRangeJoin;
2077 
2078  /// Whether the current LiveInterval tracks subregister liveness.
2079  const bool TrackSubRegLiveness;
2080 
2081  /// Values that will be present in the final live range.
2082  SmallVectorImpl<VNInfo*> &NewVNInfo;
2083 
2084  const CoalescerPair &CP;
2085  LiveIntervals *LIS;
2086  SlotIndexes *Indexes;
2087  const TargetRegisterInfo *TRI;
2088 
2089  /// Value number assignments. Maps value numbers in LI to entries in
2090  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2091  SmallVector<int, 8> Assignments;
2092 
2093  /// Conflict resolution for overlapping values.
2094  enum ConflictResolution {
2095  /// No overlap, simply keep this value.
2096  CR_Keep,
2097 
2098  /// Merge this value into OtherVNI and erase the defining instruction.
2099  /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2100  /// values.
2101  CR_Erase,
2102 
2103  /// Merge this value into OtherVNI but keep the defining instruction.
2104  /// This is for the special case where OtherVNI is defined by the same
2105  /// instruction.
2106  CR_Merge,
2107 
2108  /// Keep this value, and have it replace OtherVNI where possible. This
2109  /// complicates value mapping since OtherVNI maps to two different values
2110  /// before and after this def.
2111  /// Used when clobbering undefined or dead lanes.
2112  CR_Replace,
2113 
2114  /// Unresolved conflict. Visit later when all values have been mapped.
2115  CR_Unresolved,
2116 
2117  /// Unresolvable conflict. Abort the join.
2118  CR_Impossible
2119  };
2120 
2121  /// Per-value info for LI. The lane bit masks are all relative to the final
2122  /// joined register, so they can be compared directly between SrcReg and
2123  /// DstReg.
2124  struct Val {
2125  ConflictResolution Resolution = CR_Keep;
2126 
2127  /// Lanes written by this def, 0 for unanalyzed values.
2128  LaneBitmask WriteLanes;
2129 
2130  /// Lanes with defined values in this register. Other lanes are undef and
2131  /// safe to clobber.
2132  LaneBitmask ValidLanes;
2133 
2134  /// Value in LI being redefined by this def.
2135  VNInfo *RedefVNI = nullptr;
2136 
2137  /// Value in the other live range that overlaps this def, if any.
2138  VNInfo *OtherVNI = nullptr;
2139 
2140  /// Is this value an IMPLICIT_DEF that can be erased?
2141  ///
2142  /// IMPLICIT_DEF values should only exist at the end of a basic block that
2143  /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2144  /// safely erased if they are overlapping a live value in the other live
2145  /// interval.
2146  ///
2147  /// Weird control flow graphs and incomplete PHI handling in
2148  /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2149  /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2150  /// normal values.
2151  bool ErasableImplicitDef = false;
2152 
2153  /// True when the live range of this value will be pruned because of an
2154  /// overlapping CR_Replace value in the other live range.
2155  bool Pruned = false;
2156 
2157  /// True once Pruned above has been computed.
2158  bool PrunedComputed = false;
2159 
2160  /// True if this value is determined to be identical to OtherVNI
2161  /// (in valuesIdentical). This is used with CR_Erase where the erased
2162  /// copy is redundant, i.e. the source value is already the same as
2163  /// the destination. In such cases the subranges need to be updated
2164  /// properly. See comment at pruneSubRegValues for more info.
2165  bool Identical = false;
2166 
2167  Val() = default;
2168 
2169  bool isAnalyzed() const { return WriteLanes.any(); }
2170  };
2171 
2172  /// One entry per value number in LI.
2173  SmallVector<Val, 8> Vals;
2174 
2175  /// Compute the bitmask of lanes actually written by DefMI.
2176  /// Set Redef if there are any partial register definitions that depend on the
2177  /// previous value of the register.
2178  LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2179 
2180  /// Find the ultimate value that VNI was copied from.
2181  std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
2182 
2183  bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2184 
2185  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2186  /// Return a conflict resolution when possible, but leave the hard cases as
2187  /// CR_Unresolved.
2188  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2189  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2190  /// The recursion always goes upwards in the dominator tree, making loops
2191  /// impossible.
2192  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2193 
2194  /// Compute the value assignment for ValNo in RI.
2195  /// This may be called recursively by analyzeValue(), but never for a ValNo on
2196  /// the stack.
2197  void computeAssignment(unsigned ValNo, JoinVals &Other);
2198 
2199  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2200  /// the extent of the tainted lanes in the block.
2201  ///
2202  /// Multiple values in Other.LR can be affected since partial redefinitions
2203  /// can preserve previously tainted lanes.
2204  ///
2205  /// 1 %dst = VLOAD <-- Define all lanes in %dst
2206  /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
2207  /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
2208  /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2209  ///
2210  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2211  /// entry to TaintedVals.
2212  ///
2213  /// Returns false if the tainted lanes extend beyond the basic block.
2214  bool
2215  taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2216  SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2217 
2218  /// Return true if MI uses any of the given Lanes from Reg.
2219  /// This does not include partial redefinitions of Reg.
2220  bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
2221 
2222  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2223  /// be pruned:
2224  ///
2225  /// %dst = COPY %src
2226  /// %src = COPY %dst <-- This value to be pruned.
2227  /// %dst = COPY %src <-- This value is a copy of a pruned value.
2228  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2229 
2230 public:
2231  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
2232  SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
2233  LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2234  bool TrackSubRegLiveness)
2235  : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2236  SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2237  NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2238  TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
2239 
2240  /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2241  /// Returns false if any conflicts were impossible to resolve.
2242  bool mapValues(JoinVals &Other);
2243 
2244  /// Try to resolve conflicts that require all values to be mapped.
2245  /// Returns false if any conflicts were impossible to resolve.
2246  bool resolveConflicts(JoinVals &Other);
2247 
2248  /// Prune the live range of values in Other.LR where they would conflict with
2249  /// CR_Replace values in LR. Collect end points for restoring the live range
2250  /// after joining.
2251  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2252  bool changeInstrs);
2253 
2254  /// Removes subranges starting at copies that get removed. This sometimes
2255  /// happens when undefined subranges are copied around. These ranges contain
2256  /// no useful information and can be removed.
2257  void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2258 
2259  /// Pruning values in subranges can lead to removing segments in these
2260  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2261  /// the main range also need to be removed. This function will mark
2262  /// the corresponding values in the main range as pruned, so that
2263  /// eraseInstrs can do the final cleanup.
2264  /// The parameter @p LI must be the interval whose main range is the
2265  /// live range LR.
2266  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2267 
2268  /// Erase any machine instructions that have been coalesced away.
2269  /// Add erased instructions to ErasedInstrs.
2270  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2271  /// the erased instrs.
2272  void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2273  SmallVectorImpl<unsigned> &ShrinkRegs,
2274  LiveInterval *LI = nullptr);
2275 
2276  /// Remove liverange defs at places where implicit defs will be removed.
2277  void removeImplicitDefs();
2278 
2279  /// Get the value assignments suitable for passing to LiveInterval::join.
2280  const int *getAssignments() const { return Assignments.data(); }
2281 };
2282 
2283 } // end anonymous namespace
2284 
2285 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2286  const {
2287  LaneBitmask L;
2288  for (const MachineOperand &MO : DefMI->operands()) {
2289  if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
2290  continue;
2291  L |= TRI->getSubRegIndexLaneMask(
2292  TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2293  if (MO.readsReg())
2294  Redef = true;
2295  }
2296  return L;
2297 }
2298 
2299 std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
2300  const VNInfo *VNI) const {
2301  unsigned TrackReg = Reg;
2302 
2303  while (!VNI->isPHIDef()) {
2304  SlotIndex Def = VNI->def;
2305  MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2306  assert(MI && "No defining instruction");
2307  if (!MI->isFullCopy())
2308  return std::make_pair(VNI, TrackReg);
2309  unsigned SrcReg = MI->getOperand(1).getReg();
2311  return std::make_pair(VNI, TrackReg);
2312 
2313  const LiveInterval &LI = LIS->getInterval(SrcReg);
2314  const VNInfo *ValueIn;
2315  // No subrange involved.
2316  if (!SubRangeJoin || !LI.hasSubRanges()) {
2317  LiveQueryResult LRQ = LI.Query(Def);
2318  ValueIn = LRQ.valueIn();
2319  } else {
2320  // Query subranges. Ensure that all matching ones take us to the same def
2321  // (allowing some of them to be undef).
2322  ValueIn = nullptr;
2323  for (const LiveInterval::SubRange &S : LI.subranges()) {
2324  // Transform lanemask to a mask in the joined live interval.
2325  LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2326  if ((SMask & LaneMask).none())
2327  continue;
2328  LiveQueryResult LRQ = S.Query(Def);
2329  if (!ValueIn) {
2330  ValueIn = LRQ.valueIn();
2331  continue;
2332  }
2333  if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2334  return std::make_pair(VNI, TrackReg);
2335  }
2336  }
2337  if (ValueIn == nullptr) {
2338  // Reaching an undefined value is legitimate, for example:
2339  //
2340  // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
2341  // 2 %1 = COPY %0 ;; %1 is defined here.
2342  // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
2343  // ;; but it's equivalent to "undef".
2344  return std::make_pair(nullptr, SrcReg);
2345  }
2346  VNI = ValueIn;
2347  TrackReg = SrcReg;
2348  }
2349  return std::make_pair(VNI, TrackReg);
2350 }
2351 
2352 bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2353  const JoinVals &Other) const {
2354  const VNInfo *Orig0;
2355  unsigned Reg0;
2356  std::tie(Orig0, Reg0) = followCopyChain(Value0);
2357  if (Orig0 == Value1 && Reg0 == Other.Reg)
2358  return true;
2359 
2360  const VNInfo *Orig1;
2361  unsigned Reg1;
2362  std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2363  // If both values are undefined, and the source registers are the same
2364  // register, the values are identical. Filter out cases where only one
2365  // value is defined.
2366  if (Orig0 == nullptr || Orig1 == nullptr)
2367  return Orig0 == Orig1 && Reg0 == Reg1;
2368 
2369  // The values are equal if they are defined at the same place and use the
2370  // same register. Note that we cannot compare VNInfos directly as some of
2371  // them might be from a copy created in mergeSubRangeInto() while the other
2372  // is from the original LiveInterval.
2373  return Orig0->def == Orig1->def && Reg0 == Reg1;
2374 }
2375 
2376 JoinVals::ConflictResolution
2377 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2378  Val &V = Vals[ValNo];
2379  assert(!V.isAnalyzed() && "Value has already been analyzed!");
2380  VNInfo *VNI = LR.getValNumInfo(ValNo);
2381  if (VNI->isUnused()) {
2382  V.WriteLanes = LaneBitmask::getAll();
2383  return CR_Keep;
2384  }
2385 
2386  // Get the instruction defining this value, compute the lanes written.
2387  const MachineInstr *DefMI = nullptr;
2388  if (VNI->isPHIDef()) {
2389  // Conservatively assume that all lanes in a PHI are valid.
2390  LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2391  : TRI->getSubRegIndexLaneMask(SubIdx);
2392  V.ValidLanes = V.WriteLanes = Lanes;
2393  } else {
2394  DefMI = Indexes->getInstructionFromIndex(VNI->def);
2395  assert(DefMI != nullptr);
2396  if (SubRangeJoin) {
2397  // We don't care about the lanes when joining subregister ranges.
2398  V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2399  if (DefMI->isImplicitDef()) {
2400  V.ValidLanes = LaneBitmask::getNone();
2401  V.ErasableImplicitDef = true;
2402  }
2403  } else {
2404  bool Redef = false;
2405  V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2406 
2407  // If this is a read-modify-write instruction, there may be more valid
2408  // lanes than the ones written by this instruction.
2409  // This only covers partial redef operands. DefMI may have normal use
2410  // operands reading the register. They don't contribute valid lanes.
2411  //
2412  // This adds ssub1 to the set of valid lanes in %src:
2413  //
2414  // %src:ssub1 = FOO
2415  //
2416  // This leaves only ssub1 valid, making any other lanes undef:
2417  //
2418  // %src:ssub1<def,read-undef> = FOO %src:ssub2
2419  //
2420  // The <read-undef> flag on the def operand means that old lane values are
2421  // not important.
2422  if (Redef) {
2423  V.RedefVNI = LR.Query(VNI->def).valueIn();
2424  assert((TrackSubRegLiveness || V.RedefVNI) &&
2425  "Instruction is reading nonexistent value");
2426  if (V.RedefVNI != nullptr) {
2427  computeAssignment(V.RedefVNI->id, Other);
2428  V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2429  }
2430  }
2431 
2432  // An IMPLICIT_DEF writes undef values.
2433  if (DefMI->isImplicitDef()) {
2434  // We normally expect IMPLICIT_DEF values to be live only until the end
2435  // of their block. If the value is really live longer and gets pruned in
2436  // another block, this flag is cleared again.
2437  V.ErasableImplicitDef = true;
2438  V.ValidLanes &= ~V.WriteLanes;
2439  }
2440  }
2441  }
2442 
2443  // Find the value in Other that overlaps VNI->def, if any.
2444  LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2445 
2446  // It is possible that both values are defined by the same instruction, or
2447  // the values are PHIs defined in the same block. When that happens, the two
2448  // values should be merged into one, but not into any preceding value.
2449  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2450  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2451  assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2452 
2453  // One value stays, the other is merged. Keep the earlier one, or the first
2454  // one we see.
2455  if (OtherVNI->def < VNI->def)
2456  Other.computeAssignment(OtherVNI->id, *this);
2457  else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2458  // This is an early-clobber def overlapping a live-in value in the other
2459  // register. Not mergeable.
2460  V.OtherVNI = OtherLRQ.valueIn();
2461  return CR_Impossible;
2462  }
2463  V.OtherVNI = OtherVNI;
2464  Val &OtherV = Other.Vals[OtherVNI->id];
2465  // Keep this value, check for conflicts when analyzing OtherVNI.
2466  if (!OtherV.isAnalyzed())
2467  return CR_Keep;
2468  // Both sides have been analyzed now.
2469  // Allow overlapping PHI values. Any real interference would show up in a
2470  // predecessor, the PHI itself can't introduce any conflicts.
2471  if (VNI->isPHIDef())
2472  return CR_Merge;
2473  if ((V.ValidLanes & OtherV.ValidLanes).any())
2474  // Overlapping lanes can't be resolved.
2475  return CR_Impossible;
2476  else
2477  return CR_Merge;
2478  }
2479 
2480  // No simultaneous def. Is Other live at the def?
2481  V.OtherVNI = OtherLRQ.valueIn();
2482  if (!V.OtherVNI)
2483  // No overlap, no conflict.
2484  return CR_Keep;
2485 
2486  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2487 
2488  // We have overlapping values, or possibly a kill of Other.
2489  // Recursively compute assignments up the dominator tree.
2490  Other.computeAssignment(V.OtherVNI->id, *this);
2491  Val &OtherV = Other.Vals[V.OtherVNI->id];
2492 
2493  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2494  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2495  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2496  // technically.
2497  //
2498  // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2499  // to erase the IMPLICIT_DEF instruction.
2500  if (OtherV.ErasableImplicitDef && DefMI &&
2501  DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
2502  LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2503  << " extends into "
2504  << printMBBReference(*DefMI->getParent())
2505  << ", keeping it.\n");
2506  OtherV.ErasableImplicitDef = false;
2507  }
2508 
2509  // Allow overlapping PHI values. Any real interference would show up in a
2510  // predecessor, the PHI itself can't introduce any conflicts.
2511  if (VNI->isPHIDef())
2512  return CR_Replace;
2513 
2514  // Check for simple erasable conflicts.
2515  if (DefMI->isImplicitDef()) {
2516  // We need the def for the subregister if there is nothing else live at the
2517  // subrange at this point.
2518  if (TrackSubRegLiveness
2519  && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
2520  return CR_Replace;
2521  return CR_Erase;
2522  }
2523 
2524  // Include the non-conflict where DefMI is a coalescable copy that kills
2525  // OtherVNI. We still want the copy erased and value numbers merged.
2526  if (CP.isCoalescable(DefMI)) {
2527  // Some of the lanes copied from OtherVNI may be undef, making them undef
2528  // here too.
2529  V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2530  return CR_Erase;
2531  }
2532 
2533  // This may not be a real conflict if DefMI simply kills Other and defines
2534  // VNI.
2535  if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2536  return CR_Keep;
2537 
2538  // Handle the case where VNI and OtherVNI can be proven to be identical:
2539  //
2540  // %other = COPY %ext
2541  // %this = COPY %ext <-- Erase this copy
2542  //
2543  if (DefMI->isFullCopy() && !CP.isPartial() &&
2544  valuesIdentical(VNI, V.OtherVNI, Other)) {
2545  V.Identical = true;
2546  return CR_Erase;
2547  }
2548 
2549  // If the lanes written by this instruction were all undef in OtherVNI, it is
2550  // still safe to join the live ranges. This can't be done with a simple value
2551  // mapping, though - OtherVNI will map to multiple values:
2552  //
2553  // 1 %dst:ssub0 = FOO <-- OtherVNI
2554  // 2 %src = BAR <-- VNI
2555  // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2556  // 4 BAZ killed %dst
2557  // 5 QUUX killed %src
2558  //
2559  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2560  // handles this complex value mapping.
2561  if ((V.WriteLanes & OtherV.ValidLanes).none())
2562  return CR_Replace;
2563 
2564  // If the other live range is killed by DefMI and the live ranges are still
2565  // overlapping, it must be because we're looking at an early clobber def:
2566  //
2567  // %dst<def,early-clobber> = ASM killed %src
2568  //
2569  // In this case, it is illegal to merge the two live ranges since the early
2570  // clobber def would clobber %src before it was read.
2571  if (OtherLRQ.isKill()) {
2572  // This case where the def doesn't overlap the kill is handled above.
2573  assert(VNI->def.isEarlyClobber() &&
2574  "Only early clobber defs can overlap a kill");
2575  return CR_Impossible;
2576  }
2577 
2578  // VNI is clobbering live lanes in OtherVNI, but there is still the
2579  // possibility that no instructions actually read the clobbered lanes.
2580  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2581  // Otherwise Other.RI wouldn't be live here.
2582  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2583  return CR_Impossible;
2584 
2585  // We need to verify that no instructions are reading the clobbered lanes. To
2586  // save compile time, we'll only check that locally. Don't allow the tainted
2587  // value to escape the basic block.
2588  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2589  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2590  return CR_Impossible;
2591 
2592  // There are still some things that could go wrong besides clobbered lanes
2593  // being read, for example OtherVNI may be only partially redefined in MBB,
2594  // and some clobbered lanes could escape the block. Save this analysis for
2595  // resolveConflicts() when all values have been mapped. We need to know
2596  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2597  // that now - the recursive analyzeValue() calls must go upwards in the
2598  // dominator tree.
2599  return CR_Unresolved;
2600 }
2601 
2602 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2603  Val &V = Vals[ValNo];
2604  if (V.isAnalyzed()) {
2605  // Recursion should always move up the dominator tree, so ValNo is not
2606  // supposed to reappear before it has been assigned.
2607  assert(Assignments[ValNo] != -1 && "Bad recursion?");
2608  return;
2609  }
2610  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2611  case CR_Erase:
2612  case CR_Merge:
2613  // Merge this ValNo into OtherVNI.
2614  assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2615  assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2616  Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2617  LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2618  << LR.getValNumInfo(ValNo)->def << " into "
2619  << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2620  << V.OtherVNI->def << " --> @"
2621  << NewVNInfo[Assignments[ValNo]]->def << '\n');
2622  break;
2623  case CR_Replace:
2624  case CR_Unresolved: {
2625  // The other value is going to be pruned if this join is successful.
2626  assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2627  Val &OtherV = Other.Vals[V.OtherVNI->id];
2628  // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2629  // its lanes.
2630  if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
2631  OtherV.ErasableImplicitDef = false;
2632  OtherV.Pruned = true;
2634  }
2635  default:
2636  // This value number needs to go in the final joined live range.
2637  Assignments[ValNo] = NewVNInfo.size();
2638  NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2639  break;
2640  }
2641 }
2642 
2643 bool JoinVals::mapValues(JoinVals &Other) {
2644  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2645  computeAssignment(i, Other);
2646  if (Vals[i].Resolution == CR_Impossible) {
2647  LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2648  << '@' << LR.getValNumInfo(i)->def << '\n');
2649  return false;
2650  }
2651  }
2652  return true;
2653 }
2654 
2655 bool JoinVals::
2656 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2657  SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2658  VNInfo *VNI = LR.getValNumInfo(ValNo);
2659  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2660  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2661 
2662  // Scan Other.LR from VNI.def to MBBEnd.
2663  LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
2664  assert(OtherI != Other.LR.end() && "No conflict?");
2665  do {
2666  // OtherI is pointing to a tainted value. Abort the join if the tainted
2667  // lanes escape the block.
2668  SlotIndex End = OtherI->end;
2669  if (End >= MBBEnd) {
2670  LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
2671  << OtherI->valno->id << '@' << OtherI->start << '\n');
2672  return false;
2673  }
2674  LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
2675  << OtherI->valno->id << '@' << OtherI->start << " to "
2676  << End << '\n');
2677  // A dead def is not a problem.
2678  if (End.isDead())
2679  break;
2680  TaintExtent.push_back(std::make_pair(End, TaintedLanes));
2681 
2682  // Check for another def in the MBB.
2683  if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
2684  break;
2685 
2686  // Lanes written by the new def are no longer tainted.
2687  const Val &OV = Other.Vals[OtherI->valno->id];
2688  TaintedLanes &= ~OV.WriteLanes;
2689  if (!OV.RedefVNI)
2690  break;
2691  } while (TaintedLanes.any());
2692  return true;
2693 }
2694 
2695 bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
2696  LaneBitmask Lanes) const {
2697  if (MI.isDebugInstr())
2698  return false;
2699  for (const MachineOperand &MO : MI.operands()) {
2700  if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
2701  continue;
2702  if (!MO.readsReg())
2703  continue;
2704  unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
2705  if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
2706  return true;
2707  }
2708  return false;
2709 }
2710 
2711 bool JoinVals::resolveConflicts(JoinVals &Other) {
2712  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2713  Val &V = Vals[i];
2714  assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
2715  if (V.Resolution != CR_Unresolved)
2716  continue;
2717  LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
2718  << LR.getValNumInfo(i)->def << '\n');
2719  if (SubRangeJoin)
2720  return false;
2721 
2722  ++NumLaneConflicts;
2723  assert(V.OtherVNI && "Inconsistent conflict resolution.");
2724  VNInfo *VNI = LR.getValNumInfo(i);
2725  const Val &OtherV = Other.Vals[V.OtherVNI->id];
2726 
2727  // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
2728  // join, those lanes will be tainted with a wrong value. Get the extent of
2729  // the tainted lanes.
2730  LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
2732  if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
2733  // Tainted lanes would extend beyond the basic block.
2734  return false;
2735 
2736  assert(!TaintExtent.empty() && "There should be at least one conflict.");
2737 
2738  // Now look at the instructions from VNI->def to TaintExtent (inclusive).
2739  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2740  MachineBasicBlock::iterator MI = MBB->begin();
2741  if (!VNI->isPHIDef()) {
2742  MI = Indexes->getInstructionFromIndex(VNI->def);
2743  // No need to check the instruction defining VNI for reads.
2744  ++MI;
2745  }
2746  assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
2747  "Interference ends on VNI->def. Should have been handled earlier");
2748  MachineInstr *LastMI =
2749  Indexes->getInstructionFromIndex(TaintExtent.front().first);
2750  assert(LastMI && "Range must end at a proper instruction");
2751  unsigned TaintNum = 0;
2752  while (true) {
2753  assert(MI != MBB->end() && "Bad LastMI");
2754  if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
2755  LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
2756  return false;
2757  }
2758  // LastMI is the last instruction to use the current value.
2759  if (&*MI == LastMI) {
2760  if (++TaintNum == TaintExtent.size())
2761  break;
2762  LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
2763  assert(LastMI && "Range must end at a proper instruction");
2764  TaintedLanes = TaintExtent[TaintNum].second;
2765  }
2766  ++MI;
2767  }
2768 
2769  // The tainted lanes are unused.
2770  V.Resolution = CR_Replace;
2771  ++NumLaneResolves;
2772  }
2773  return true;
2774 }
2775 
2776 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
2777  Val &V = Vals[ValNo];
2778  if (V.Pruned || V.PrunedComputed)
2779  return V.Pruned;
2780 
2781  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
2782  return V.Pruned;
2783 
2784  // Follow copies up the dominator tree and check if any intermediate value
2785  // has been pruned.
2786  V.PrunedComputed = true;
2787  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
2788  return V.Pruned;
2789 }
2790 
2791 void JoinVals::pruneValues(JoinVals &Other,
2792  SmallVectorImpl<SlotIndex> &EndPoints,
2793  bool changeInstrs) {
2794  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2795  SlotIndex Def = LR.getValNumInfo(i)->def;
2796  switch (Vals[i].Resolution) {
2797  case CR_Keep:
2798  break;
2799  case CR_Replace: {
2800  // This value takes precedence over the value in Other.LR.
2801  LIS->pruneValue(Other.LR, Def, &EndPoints);
2802  // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
2803  // instructions are only inserted to provide a live-out value for PHI
2804  // predecessors, so the instruction should simply go away once its value
2805  // has been replaced.
2806  Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
2807  bool EraseImpDef = OtherV.ErasableImplicitDef &&
2808  OtherV.Resolution == CR_Keep;
2809  if (!Def.isBlock()) {
2810  if (changeInstrs) {
2811  // Remove <def,read-undef> flags. This def is now a partial redef.
2812  // Also remove dead flags since the joined live range will
2813  // continue past this instruction.
2814  for (MachineOperand &MO :
2815  Indexes->getInstructionFromIndex(Def)->operands()) {
2816  if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
2817  if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
2818  MO.setIsUndef(false);
2819  MO.setIsDead(false);
2820  }
2821  }
2822  }
2823  // This value will reach instructions below, but we need to make sure
2824  // the live range also reaches the instruction at Def.
2825  if (!EraseImpDef)
2826  EndPoints.push_back(Def);
2827  }
2828  LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
2829  << ": " << Other.LR << '\n');
2830  break;
2831  }
2832  case CR_Erase:
2833  case CR_Merge:
2834  if (isPrunedValue(i, Other)) {
2835  // This value is ultimately a copy of a pruned value in LR or Other.LR.
2836  // We can no longer trust the value mapping computed by
2837  // computeAssignment(), the value that was originally copied could have
2838  // been replaced.
2839  LIS->pruneValue(LR, Def, &EndPoints);
2840  LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
2841  << Def << ": " << LR << '\n');
2842  }
2843  break;
2844  case CR_Unresolved:
2845  case CR_Impossible:
2846  llvm_unreachable("Unresolved conflicts");
2847  }
2848  }
2849 }
2850 
2851 /// Consider the following situation when coalescing the copy between
2852 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
2853 ///
2854 /// Main range Subrange 0004 (sub2)
2855 /// %31 %45 %31 %45
2856 /// 544 %45 = COPY %28 + +
2857 /// | v1 | v1
2858 /// 560B bb.1: + +
2859 /// 624 = %45.sub2 | v2 | v2
2860 /// 800 %31 = COPY %45 + + + +
2861 /// | v0 | v0
2862 /// 816 %31.sub1 = ... + |
2863 /// 880 %30 = COPY %31 | v1 +
2864 /// 928 %45 = COPY %30 | + +
2865 /// | | v0 | v0 <--+
2866 /// 992B ; backedge -> bb.1 | + + |
2867 /// 1040 = %31.sub0 + |
2868 /// This value must remain
2869 /// live-out!
2870 ///
2871 /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
2872 /// redundant, since it copies the value from %45 back into it. The
2873 /// conflict resolution for the main range determines that %45.v0 is
2874 /// to be erased, which is ok since %31.v1 is identical to it.
2875 /// The problem happens with the subrange for sub2: it has to be live
2876 /// on exit from the block, but since 928 was actually a point of
2877 /// definition of %45.sub2, %45.sub2 was not live immediately prior
2878 /// to that definition. As a result, when 928 was erased, the value v0
2879 /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
2880 /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
2881 /// providing an incorrect value to the use at 624.
2882 ///
2883 /// Since the main-range values %31.v1 and %45.v0 were proved to be
2884 /// identical, the corresponding values in subranges must also be the
2885 /// same. A redundant copy is removed because it's not needed, and not
2886 /// because it copied an undefined value, so any liveness that originated
2887 /// from that copy cannot disappear. When pruning a value that started
2888 /// at the removed copy, the corresponding identical value must be
2889 /// extended to replace it.
2890 void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
2891  // Look for values being erased.
2892  bool DidPrune = false;
2893  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2894  Val &V = Vals[i];
2895  // We should trigger in all cases in which eraseInstrs() does something.
2896  // match what eraseInstrs() is doing, print a message so
2897  if (V.Resolution != CR_Erase &&
2898  (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
2899  continue;
2900 
2901  // Check subranges at the point where the copy will be removed.
2902  SlotIndex Def = LR.getValNumInfo(i)->def;
2903  SlotIndex OtherDef;
2904  if (V.Identical)
2905  OtherDef = V.OtherVNI->def;
2906 
2907  // Print message so mismatches with eraseInstrs() can be diagnosed.
2908  LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
2909  << '\n');
2910  for (LiveInterval::SubRange &S : LI.subranges()) {
2911  LiveQueryResult Q = S.Query(Def);
2912 
2913  // If a subrange starts at the copy then an undefined value has been
2914  // copied and we must remove that subrange value as well.
2915  VNInfo *ValueOut = Q.valueOutOrDead();
2916  if (ValueOut != nullptr && Q.valueIn() == nullptr) {
2917  LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
2918  << " at " << Def << "\n");
2919  SmallVector<SlotIndex,8> EndPoints;
2920  LIS->pruneValue(S, Def, &EndPoints);
2921  DidPrune = true;
2922  // Mark value number as unused.
2923  ValueOut->markUnused();
2924 
2925  if (V.Identical && S.Query(OtherDef).valueOut()) {
2926  // If V is identical to V.OtherVNI (and S was live at OtherDef),
2927  // then we can't simply prune V from S. V needs to be replaced
2928  // with V.OtherVNI.
2929  LIS->extendToIndices(S, EndPoints);
2930  }
2931  continue;
2932  }
2933  // If a subrange ends at the copy, then a value was copied but only
2934  // partially used later. Shrink the subregister range appropriately.
2935  if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
2936  LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
2937  << PrintLaneMask(S.LaneMask) << " at " << Def
2938  << "\n");
2939  ShrinkMask |= S.LaneMask;
2940  }
2941  }
2942  }
2943  if (DidPrune)
2944  LI.removeEmptySubRanges();
2945 }
2946 
2947 /// Check if any of the subranges of @p LI contain a definition at @p Def.
2949  for (LiveInterval::SubRange &SR : LI.subranges()) {
2950  if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
2951  if (VNI->def == Def)
2952  return true;
2953  }
2954  return false;
2955 }
2956 
2957 void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
2958  assert(&static_cast<LiveRange&>(LI) == &LR);
2959 
2960  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2961  if (Vals[i].Resolution != CR_Keep)
2962  continue;
2963  VNInfo *VNI = LR.getValNumInfo(i);
2964  if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
2965  continue;
2966  Vals[i].Pruned = true;
2967  ShrinkMainRange = true;
2968  }
2969 }
2970 
2971 void JoinVals::removeImplicitDefs() {
2972  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2973  Val &V = Vals[i];
2974  if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
2975  continue;
2976 
2977  VNInfo *VNI = LR.getValNumInfo(i);
2978  VNI->markUnused();
2979  LR.removeValNo(VNI);
2980  }
2981 }
2982 
2983 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2984  SmallVectorImpl<unsigned> &ShrinkRegs,
2985  LiveInterval *LI) {
2986  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2987  // Get the def location before markUnused() below invalidates it.
2988  SlotIndex Def = LR.getValNumInfo(i)->def;
2989  switch (Vals[i].Resolution) {
2990  case CR_Keep: {
2991  // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
2992  // longer. The IMPLICIT_DEF instructions are only inserted by
2993  // PHIElimination to guarantee that all PHI predecessors have a value.
2994  if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
2995  break;
2996  // Remove value number i from LR.
2997  // For intervals with subranges, removing a segment from the main range
2998  // may require extending the previous segment: for each definition of
2999  // a subregister, there will be a corresponding def in the main range.
3000  // That def may fall in the middle of a segment from another subrange.
3001  // In such cases, removing this def from the main range must be
3002  // complemented by extending the main range to account for the liveness
3003  // of the other subrange.
3004  VNInfo *VNI = LR.getValNumInfo(i);
3005  SlotIndex Def = VNI->def;
3006  // The new end point of the main range segment to be extended.
3007  SlotIndex NewEnd;
3008  if (LI != nullptr) {
3009  LiveRange::iterator I = LR.FindSegmentContaining(Def);
3010  assert(I != LR.end());
3011  // Do not extend beyond the end of the segment being removed.
3012  // The segment may have been pruned in preparation for joining
3013  // live ranges.
3014  NewEnd = I->end;
3015  }
3016 
3017  LR.removeValNo(VNI);
3018  // Note that this VNInfo is reused and still referenced in NewVNInfo,
3019  // make it appear like an unused value number.
3020  VNI->markUnused();
3021 
3022  if (LI != nullptr && LI->hasSubRanges()) {
3023  assert(static_cast<LiveRange*>(LI) == &LR);
3024  // Determine the end point based on the subrange information:
3025  // minimum of (earliest def of next segment,
3026  // latest end point of containing segment)
3027  SlotIndex ED, LE;
3028  for (LiveInterval::SubRange &SR : LI->subranges()) {
3029  LiveRange::iterator I = SR.find(Def);
3030  if (I == SR.end())
3031  continue;
3032  if (I->start > Def)
3033  ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3034  else
3035  LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3036  }
3037  if (LE.isValid())
3038  NewEnd = std::min(NewEnd, LE);
3039  if (ED.isValid())
3040  NewEnd = std::min(NewEnd, ED);
3041 
3042  // We only want to do the extension if there was a subrange that
3043  // was live across Def.
3044  if (LE.isValid()) {
3045  LiveRange::iterator S = LR.find(Def);
3046  if (S != LR.begin())
3047  std::prev(S)->end = NewEnd;
3048  }
3049  }
3050  LLVM_DEBUG({
3051  dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3052  if (LI != nullptr)
3053  dbgs() << "\t\t LHS = " << *LI << '\n';
3054  });
3056  }
3057 
3058  case CR_Erase: {
3059  MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3060  assert(MI && "No instruction to erase");
3061  if (MI->isCopy()) {
3062  unsigned Reg = MI->getOperand(1).getReg();
3064  Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3065  ShrinkRegs.push_back(Reg);
3066  }
3067  ErasedInstrs.insert(MI);
3068  LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3069  LIS->RemoveMachineInstrFromMaps(*MI);
3070  MI->eraseFromParent();
3071  break;
3072  }
3073  default:
3074  break;
3075  }
3076  }
3077 }
3078 
3079 void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3080  LaneBitmask LaneMask,
3081  const CoalescerPair &CP) {
3082  SmallVector<VNInfo*, 16> NewVNInfo;
3083  JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3084  NewVNInfo, CP, LIS, TRI, true, true);
3085  JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3086  NewVNInfo, CP, LIS, TRI, true, true);
3087 
3088  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3089  // We should be able to resolve all conflicts here as we could successfully do
3090  // it on the mainrange already. There is however a problem when multiple
3091  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3092  // interferences.
3093  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3094  // We already determined that it is legal to merge the intervals, so this
3095  // should never fail.
3096  llvm_unreachable("*** Couldn't join subrange!\n");
3097  }
3098  if (!LHSVals.resolveConflicts(RHSVals) ||
3099  !RHSVals.resolveConflicts(LHSVals)) {
3100  // We already determined that it is legal to merge the intervals, so this
3101  // should never fail.
3102  llvm_unreachable("*** Couldn't join subrange!\n");
3103  }
3104 
3105  // The merging algorithm in LiveInterval::join() can't handle conflicting
3106  // value mappings, so we need to remove any live ranges that overlap a
3107  // CR_Replace resolution. Collect a set of end points that can be used to
3108  // restore the live range after joining.
3109  SmallVector<SlotIndex, 8> EndPoints;
3110  LHSVals.pruneValues(RHSVals, EndPoints, false);
3111  RHSVals.pruneValues(LHSVals, EndPoints, false);
3112 
3113  LHSVals.removeImplicitDefs();
3114  RHSVals.removeImplicitDefs();
3115 
3116  LRange.verify();
3117  RRange.verify();
3118 
3119  // Join RRange into LHS.
3120  LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3121  NewVNInfo);
3122 
3123  LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3124  << ' ' << LRange << "\n");
3125  if (EndPoints.empty())
3126  return;
3127 
3128  // Recompute the parts of the live range we had to remove because of
3129  // CR_Replace conflicts.
3130  LLVM_DEBUG({
3131  dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3132  for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3133  dbgs() << EndPoints[i];
3134  if (i != n-1)
3135  dbgs() << ',';
3136  }
3137  dbgs() << ": " << LRange << '\n';
3138  });
3139  LIS->extendToIndices(LRange, EndPoints);
3140 }
3141 
3142 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3143  const LiveRange &ToMerge,
3144  LaneBitmask LaneMask,
3145  CoalescerPair &CP) {
3147  LI.refineSubRanges(Allocator, LaneMask,
3148  [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
3149  if (SR.empty()) {
3150  SR.assign(ToMerge, Allocator);
3151  } else {
3152  // joinSubRegRange() destroys the merged range, so we need a copy.
3153  LiveRange RangeCopy(ToMerge, Allocator);
3154  joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3155  }
3156  });
3157 }
3158 
3159 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3160  SmallVector<VNInfo*, 16> NewVNInfo;
3161  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3162  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3163  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3164  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3165  NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3166  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3167  NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3168 
3169  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3170 
3171  // First compute NewVNInfo and the simple value mappings.
3172  // Detect impossible conflicts early.
3173  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3174  return false;
3175 
3176  // Some conflicts can only be resolved after all values have been mapped.
3177  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3178  return false;
3179 
3180  // All clear, the live ranges can be merged.
3181  if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3182  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3183 
3184  // Transform lanemasks from the LHS to masks in the coalesced register and
3185  // create initial subranges if necessary.
3186  unsigned DstIdx = CP.getDstIdx();
3187  if (!LHS.hasSubRanges()) {
3188  LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3189  : TRI->getSubRegIndexLaneMask(DstIdx);
3190  // LHS must support subregs or we wouldn't be in this codepath.
3191  assert(Mask.any());
3192  LHS.createSubRangeFrom(Allocator, Mask, LHS);
3193  } else if (DstIdx != 0) {
3194  // Transform LHS lanemasks to new register class if necessary.
3195  for (LiveInterval::SubRange &R : LHS.subranges()) {
3196  LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3197  R.LaneMask = Mask;
3198  }
3199  }
3200  LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3201  << '\n');
3202 
3203  // Determine lanemasks of RHS in the coalesced register and merge subranges.
3204  unsigned SrcIdx = CP.getSrcIdx();
3205  if (!RHS.hasSubRanges()) {
3206  LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3207  : TRI->getSubRegIndexLaneMask(SrcIdx);
3208  mergeSubRangeInto(LHS, RHS, Mask, CP);
3209  } else {
3210  // Pair up subranges and merge.
3211  for (LiveInterval::SubRange &R : RHS.subranges()) {
3212  LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3213  mergeSubRangeInto(LHS, R, Mask, CP);
3214  }
3215  }
3216  LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3217 
3218  // Pruning implicit defs from subranges may result in the main range
3219  // having stale segments.
3220  LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3221 
3222  LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3223  RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3224  }
3225 
3226  // The merging algorithm in LiveInterval::join() can't handle conflicting
3227  // value mappings, so we need to remove any live ranges that overlap a
3228  // CR_Replace resolution. Collect a set of end points that can be used to
3229  // restore the live range after joining.
3230  SmallVector<SlotIndex, 8> EndPoints;
3231  LHSVals.pruneValues(RHSVals, EndPoints, true);
3232  RHSVals.pruneValues(LHSVals, EndPoints, true);
3233 
3234  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3235  // registers to require trimming.
3236  SmallVector<unsigned, 8> ShrinkRegs;
3237  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3238  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3239  while (!ShrinkRegs.empty())
3240  shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3241 
3242  // Join RHS into LHS.
3243  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3244 
3245  // Kill flags are going to be wrong if the live ranges were overlapping.
3246  // Eventually, we should simply clear all kill flags when computing live
3247  // ranges. They are reinserted after register allocation.
3248  MRI->clearKillFlags(LHS.reg);
3249  MRI->clearKillFlags(RHS.reg);
3250 
3251  if (!EndPoints.empty()) {
3252  // Recompute the parts of the live range we had to remove because of
3253  // CR_Replace conflicts.
3254  LLVM_DEBUG({
3255  dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3256  for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3257  dbgs() << EndPoints[i];
3258  if (i != n-1)
3259  dbgs() << ',';
3260  }
3261  dbgs() << ": " << LHS << '\n';
3262  });
3263  LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3264  }
3265 
3266  return true;
3267 }
3268 
3269 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3270  return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3271 }
3272 
3273 namespace {
3274 
3275 /// Information concerning MBB coalescing priority.
3276 struct MBBPriorityInfo {
3277  MachineBasicBlock *MBB;
3278  unsigned Depth;
3279  bool IsSplit;
3280 
3281  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3282  : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3283 };
3284 
3285 } // end anonymous namespace
3286 
3287 /// C-style comparator that sorts first based on the loop depth of the basic
3288 /// block (the unsigned), and then on the MBB number.
3289 ///
3290 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
3291 static int compareMBBPriority(const MBBPriorityInfo *LHS,
3292  const MBBPriorityInfo *RHS) {
3293  // Deeper loops first
3294  if (LHS->Depth != RHS->Depth)
3295  return LHS->Depth > RHS->Depth ? -1 : 1;
3296 
3297  // Try to unsplit critical edges next.
3298  if (LHS->IsSplit != RHS->IsSplit)
3299  return LHS->IsSplit ? -1 : 1;
3300 
3301  // Prefer blocks that are more connected in the CFG. This takes care of
3302  // the most difficult copies first while intervals are short.
3303  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3304  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3305  if (cl != cr)
3306  return cl > cr ? -1 : 1;
3307 
3308  // As a last resort, sort by block number.
3309  return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3310 }
3311 
3312 /// \returns true if the given copy uses or defines a local live range.
3313 static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3314  if (!Copy->isCopy())
3315  return false;
3316 
3317  if (Copy->getOperand(1).isUndef())
3318  return false;
3319 
3320  unsigned SrcReg = Copy->getOperand(1).getReg();
3321  unsigned DstReg = Copy->getOperand(0).getReg();
3324  return false;
3325 
3326  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3327  || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3328 }
3329 
3330 void RegisterCoalescer::lateLiveIntervalUpdate() {
3331  for (unsigned reg : ToBeUpdated) {
3332  if (!LIS->hasInterval(reg))
3333  continue;
3334  LiveInterval &LI = LIS->getInterval(reg);
3335  shrinkToUses(&LI, &DeadDefs);
3336  if (!DeadDefs.empty())
3337  eliminateDeadDefs();
3338  }
3339  ToBeUpdated.clear();
3340 }
3341 
3342 bool RegisterCoalescer::
3343 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3344  bool Progress = false;
3345  for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
3346  if (!CurrList[i])
3347  continue;
3348  // Skip instruction pointers that have already been erased, for example by
3349  // dead code elimination.
3350  if (ErasedInstrs.count(CurrList[i])) {
3351  CurrList[i] = nullptr;
3352  continue;
3353  }
3354  bool Again = false;
3355  bool Success = joinCopy(CurrList[i], Again);
3356  Progress |= Success;
3357  if (Success || !Again)
3358  CurrList[i] = nullptr;
3359  }
3360  return Progress;
3361 }
3362 
3363 /// Check if DstReg is a terminal node.
3364 /// I.e., it does not have any affinity other than \p Copy.
3365 static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
3366  const MachineRegisterInfo *MRI) {
3367  assert(Copy.isCopyLike());
3368  // Check if the destination of this copy as any other affinity.
3369  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3370  if (&MI != &Copy && MI.isCopyLike())
3371  return false;
3372  return true;
3373 }
3374 
3375 bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3376  assert(Copy.isCopyLike());
3377  if (!UseTerminalRule)
3378  return false;
3379  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
3380  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
3381  // Check if the destination of this copy has any other affinity.
3383  // If SrcReg is a physical register, the copy won't be coalesced.
3384  // Ignoring it may have other side effect (like missing
3385  // rematerialization). So keep it.
3387  !isTerminalReg(DstReg, Copy, MRI))
3388  return false;
3389 
3390  // DstReg is a terminal node. Check if it interferes with any other
3391  // copy involving SrcReg.
3392  const MachineBasicBlock *OrigBB = Copy.getParent();
3393  const LiveInterval &DstLI = LIS->getInterval(DstReg);
3394  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3395  // Technically we should check if the weight of the new copy is
3396  // interesting compared to the other one and update the weight
3397  // of the copies accordingly. However, this would only work if
3398  // we would gather all the copies first then coalesce, whereas
3399  // right now we interleave both actions.
3400  // For now, just consider the copies that are in the same block.
3401  if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
3402  continue;
3403  unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
3404  isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3405  OtherSubReg);
3406  if (OtherReg == SrcReg)
3407  OtherReg = OtherSrcReg;
3408  // Check if OtherReg is a non-terminal.
3410  isTerminalReg(OtherReg, MI, MRI))
3411  continue;
3412  // Check that OtherReg interfere with DstReg.
3413  if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3414  LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
3415  << '\n');
3416  return true;
3417  }
3418  }
3419  return false;
3420 }
3421 
3422 void
3423 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3424  LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
3425 
3426  // Collect all copy-like instructions in MBB. Don't start coalescing anything
3427  // yet, it might invalidate the iterator.
3428  const unsigned PrevSize = WorkList.size();
3429  if (JoinGlobalCopies) {
3430  SmallVector<MachineInstr*, 2> LocalTerminals;
3431  SmallVector<MachineInstr*, 2> GlobalTerminals;
3432  // Coalesce copies bottom-up to coalesce local defs before local uses. They
3433  // are not inherently easier to resolve, but slightly preferable until we
3434  // have local live range splitting. In particular this is required by
3435  // cmp+jmp macro fusion.
3436  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
3437  MII != E; ++MII) {
3438  if (!MII->isCopyLike())
3439  continue;
3440  bool ApplyTerminalRule = applyTerminalRule(*MII);
3441  if (isLocalCopy(&(*MII), LIS)) {
3442  if (ApplyTerminalRule)
3443  LocalTerminals.push_back(&(*MII));
3444  else
3445  LocalWorkList.push_back(&(*MII));
3446  } else {
3447  if (ApplyTerminalRule)
3448  GlobalTerminals.push_back(&(*MII));
3449  else
3450  WorkList.push_back(&(*MII));
3451  }
3452  }
3453  // Append the copies evicted by the terminal rule at the end of the list.
3454  LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
3455  WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
3456  }
3457  else {
3459  for (MachineInstr &MII : *MBB)
3460  if (MII.isCopyLike()) {
3461  if (applyTerminalRule(MII))
3462  Terminals.push_back(&MII);
3463  else
3464  WorkList.push_back(&MII);
3465  }
3466  // Append the copies evicted by the terminal rule at the end of the list.
3467  WorkList.append(Terminals.begin(), Terminals.end());
3468  }
3469  // Try coalescing the collected copies immediately, and remove the nulls.
3470  // This prevents the WorkList from getting too large since most copies are
3471  // joinable on the first attempt.
3473  CurrList(WorkList.begin() + PrevSize, WorkList.end());
3474  if (copyCoalesceWorkList(CurrList))
3475  WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
3476  nullptr), WorkList.end());
3477 }
3478 
3479 void RegisterCoalescer::coalesceLocals() {
3480  copyCoalesceWorkList(LocalWorkList);
3481  for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
3482  if (LocalWorkList[j])
3483  WorkList.push_back(LocalWorkList[j]);
3484  }
3485  LocalWorkList.clear();
3486 }
3487 
3488 void RegisterCoalescer::joinAllIntervals() {
3489  LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
3490  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
3491 
3492  std::vector<MBBPriorityInfo> MBBs;
3493  MBBs.reserve(MF->size());
3494  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
3495  MachineBasicBlock *MBB = &*I;
3496  MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
3497  JoinSplitEdges && isSplitEdge(MBB)));
3498  }
3499  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
3500 
3501  // Coalesce intervals in MBB priority order.
3502  unsigned CurrDepth = std::numeric_limits<unsigned>::max();
3503  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
3504  // Try coalescing the collected local copies for deeper loops.
3505  if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
3506  coalesceLocals();
3507  CurrDepth = MBBs[i].Depth;
3508  }
3509  copyCoalesceInMBB(MBBs[i].MBB);
3510  }
3511  lateLiveIntervalUpdate();
3512  coalesceLocals();
3513 
3514  // Joining intervals can allow other intervals to be joined. Iteratively join
3515  // until we make no progress.
3516  while (copyCoalesceWorkList(WorkList))
3517  /* empty */ ;
3518  lateLiveIntervalUpdate();
3519 }
3520 
3521 void RegisterCoalescer::releaseMemory() {
3522  ErasedInstrs.clear();
3523  WorkList.clear();
3524  DeadDefs.clear();
3525  InflateRegs.clear();
3526 }
3527 
3528 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
3529  MF = &fn;
3530  MRI = &fn.getRegInfo();
3531  const TargetSubtargetInfo &STI = fn.getSubtarget();
3532  TRI = STI.getRegisterInfo();
3533  TII = STI.getInstrInfo();
3534  LIS = &getAnalysis<LiveIntervals>();
3535  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3536  Loops = &getAnalysis<MachineLoopInfo>();
3538  JoinGlobalCopies = STI.enableJoinGlobalCopies();
3539  else
3540  JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
3541 
3542  // The MachineScheduler does not currently require JoinSplitEdges. This will
3543  // either be enabled unconditionally or replaced by a more general live range
3544  // splitting optimization.
3545  JoinSplitEdges = EnableJoinSplits;
3546 
3547  LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
3548  << "********** Function: " << MF->getName() << '\n');
3549 
3550  if (VerifyCoalescing)
3551  MF->verify(this, "Before register coalescing");
3552 
3553  RegClassInfo.runOnMachineFunction(fn);
3554 
3555  // Join (coalesce) intervals if requested.
3556  if (EnableJoining)
3557  joinAllIntervals();
3558 
3559  // After deleting a lot of copies, register classes may be less constrained.
3560  // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
3561  // DPR inflation.
3562  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
3563  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
3564  InflateRegs.end());
3565  LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
3566  << " regs.\n");
3567  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
3568  unsigned Reg = InflateRegs[i];
3569  if (MRI->reg_nodbg_empty(Reg))
3570  continue;
3571  if (MRI->recomputeRegClass(Reg)) {
3572  LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
3573  << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
3574  ++NumInflated;
3575 
3576  LiveInterval &LI = LIS->getInterval(Reg);
3577  if (LI.hasSubRanges()) {
3578  // If the inflated register class does not support subregisters anymore
3579  // remove the subranges.
3580  if (!MRI->shouldTrackSubRegLiveness(Reg)) {
3581  LI.clearSubRanges();
3582  } else {
3583 #ifndef NDEBUG
3584  LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
3585  // If subranges are still supported, then the same subregs
3586  // should still be supported.
3587  for (LiveInterval::SubRange &S : LI.subranges()) {
3588  assert((S.LaneMask & ~MaxMask).none());
3589  }
3590 #endif
3591  }
3592  }
3593  }
3594  }
3595 
3596  LLVM_DEBUG(dump());
3597  if (VerifyCoalescing)
3598  MF->verify(this, "After register coalescing");
3599  return true;
3600 }
3601 
3602 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
3603  LIS->print(O, m);
3604 }
bool reg_nodbg_empty(unsigned RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions...
const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B, const MVT::SimpleValueType SVT=MVT::SimpleValueType::Any) const
Find the largest common subclass of A and B.
bool empty() const
Definition: LiveInterval.h:370
static bool isSplitEdge(const MachineBasicBlock *MBB)
Return true if this block should be vacated by the coalescer to eliminate branches.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(unsigned Reg) const
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const unsigned reg
Definition: LiveInterval.h:667
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
virtual bool enableJoinGlobalCopies() const
True if the subtarget should enable joining global copies.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
void RemoveMachineInstrFromMaps(MachineInstr &MI)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const
Returns true if VNI is killed by any PHI-def values in LI.
void removePhysRegDefAt(unsigned Reg, SlotIndex Pos)
Remove value numbers and related live segments starting at position Pos that are part of any liverang...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool isCoalescable(const MachineInstr *) const
Return true if MI is a copy instruction that will become an identity copy after coalescing.
static cl::opt< bool > VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
Segments::iterator iterator
Definition: LiveInterval.h:208
unsigned size() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
void eliminateDeadDefs(SmallVectorImpl< MachineInstr *> &Dead, ArrayRef< unsigned > RegsBeingSpilled=None, AliasAnalysis *AA=nullptr)
eliminateDeadDefs - Try to delete machine instructions that are now dead (allDefsAreDead returns true...
static cl::opt< bool > EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(unsigned Reg) const
unsigned Reg
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS)
MachineInstr * commuteInstruction(MachineInstr &MI, bool NewMI=false, unsigned OpIdx1=CommuteAnyOperandIndex, unsigned OpIdx2=CommuteAnyOperandIndex) const
This method commutes the operands of the given machine instruction MI.
unsigned getSubReg() const
bool test(unsigned Idx) const
Definition: BitVector.h:502
static bool definesFullReg(const MachineInstr &MI, unsigned Reg)
Returns true if MI defines the full vreg Reg, as opposed to just defining a subregister.
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
bool isDead() const
isDead - Returns true if this is a dead def kill slot.
Definition: SlotIndexes.h:237
unsigned getLoopDepth(const MachineBasicBlock *BB) const
Return the loop nesting level of the specified block.
A live range for subregisters.
Definition: LiveInterval.h:645
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
bool isPhys() const
Return true if DstReg is a physical register.
void markUnused()
Mark this value as unused.
Definition: LiveInterval.h:84
void reserve(size_type N)
Definition: SmallVector.h:376
SubRange * createSubRangeFrom(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, const LiveRange &CopyFrom)
Like createSubRange() but the new range is filled with a copy of the liveness information in CopyFrom...
Definition: LiveInterval.h:742
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
Definition: LiveInterval.h:123
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
void print(raw_ostream &O, const Module *=nullptr) const override
Implement the dump method.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:906
bool checkRegMaskInterference(LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
Callback methods for LiveRangeEdit owners.
Definition: LiveRangeEdit.h:49
void initializeRegisterCoalescerPass(PassRegistry &)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def)
Check if any of the subranges of LI contain a definition at Def.
bool isUnused() const
Returns true if this value is unused.
Definition: LiveInterval.h:81
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
unsigned getDstReg() const
Return the register (virtual or physical) that will remain after coalescing.
bool isEarlyClobber() const
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
bool isPartial() const
Return true if the original copy instruction did not copy the full register, but was a subreg operati...
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isBlock() const
isBlock - Returns true if this is a block boundary slot.
Definition: SlotIndexes.h:227
Hexagon Hardware Loops
void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl< SlotIndex > *EndPoints)
If LR has a live value at Kill, prune its live range by removing any liveness reachable from Kill...
static use_iterator use_end()
void verify() const
Walk the range and assert if any invariants fail to hold.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified &#39;original&#39; instruction at the specific location targeting a new destination re...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:209
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
iterator end()
Definition: LiveInterval.h:212
VNInfo::Allocator & getVNInfoAllocator()
A helper class for register coalescers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:723
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
Result of a LiveRange query.
Definition: LiveInterval.h:90
static cl::opt< bool > EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden)
Temporary flag to test critical edge unsplitting.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:84
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:751
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:773
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
static reg_instr_iterator reg_instr_end()
SlotIndex getNextNonNullIndex(SlotIndex Index)
Returns the next non-null index, if one exists.
Definition: SlotIndexes.h:430
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
const TargetRegisterClass * getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *RCB, unsigned SubB, unsigned &PreA, unsigned &PreB) const
Find a common super-register class if it exists.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:308
bool isFullCopy() const
Definition: MachineInstr.h:896
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
MCRegUnitRootIterator enumerates the root registers of a register unit.
Segments segments
Definition: LiveInterval.h:199
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
VNInfo * MergeValueNumberInto(VNInfo *V1, VNInfo *V2)
MergeValueNumberInto - This method is called when two value numbers are found to be equivalent...
AnalysisUsage & addPreservedID(const void *ID)
const char * getSubRegIndexName(unsigned SubIdx) const
Return the human-readable symbolic target-specific name for the specified SubRegIndex.
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
bool setRegisters(const MachineInstr *)
Set registers to match the copy instruction MI.
VNInfo * createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc)
createDeadDef - Make sure the range has a value defined at Def.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool allDefsAreDead() const
Return true if all the defs of this instruction are dead.
LiveRange * getCachedRegUnit(unsigned Unit)
Return the live range for register unit Unit if it has already been computed, or nullptr if it hasn&#39;t...
static constexpr LaneBitmask getLane(unsigned Lane)
Definition: LaneBitmask.h:85
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
static cl::opt< unsigned > LateRematUpdateThreshold("late-remat-update-threshold", cl::Hidden, cl::desc("During rematerialization for a copy, if the def instruction has " "many other copy uses to be rematerialized, delay the multiple " "separate live interval update work and do them all at once after " "all those rematerialization are done. It will save a lot of " "repeated work. "), cl::init(100))
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
bool containsOneValue() const
Definition: LiveInterval.h:299
TargetInstrInfo - Interface to description of machine instruction set.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:409
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:936
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
SubRange * createSubRange(BumpPtrAllocator &Allocator, LaneBitmask LaneMask)
Creates a new empty subregister live range.
Definition: LiveInterval.h:733
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool hasInterval(unsigned Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS)
C-style comparator that sorts first based on the loop depth of the basic block (the unsigned)...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
void removeInterval(unsigned Reg)
Interval removal.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:389
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo)
Copy segments with value number SrcValNo from liverange Src to live range and use value number DstVa...
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval *> &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
unsigned composeSubRegIndices(unsigned a, unsigned b) const
Return the subregister index you get from composing two subregister indices.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
SlotIndex getIndexBefore(const MachineInstr &MI) const
getIndexBefore - Returns the index of the last indexed instruction before MI, or the start index of i...
Definition: SlotIndexes.h:443
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
Represent the analysis usage information of a pass.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr *> *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses...
void substPhysReg(unsigned Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx=nullptr) const
Return true if the use operand of the specified index is tied to a def operand.
virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const
Return a subclass of the specified register class A so that each register in it has a sub-register of...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI)
Check if DstReg is a terminal node.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
constexpr bool all() const
Definition: LaneBitmask.h:54
void join(LiveRange &Other, const int *ValNoAssignments, const int *RHSValNoAssignments, SmallVectorImpl< VNInfo *> &NewVNInfo)
join - Join two live ranges (this, and other) together.
std::pair< bool, bool > readsWritesVirtualRegister(unsigned Reg, SmallVectorImpl< unsigned > *Ops=nullptr) const
Return a pair of bools (reads, writes) indicating if this instruction reads or writes Reg...
iterator_range< pred_iterator > predecessors()
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
bool isFlipped() const
Return true when getSrcReg is the register being defined by the original copy instruction.
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply)
Refines the subranges to support LaneMask.
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
unsigned getSrcReg() const
Return the virtual register that will be coalesced away.
virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const
A callback to allow target a chance to update register allocation hints when a register is "changed" ...
bool isCopy() const
Definition: MachineInstr.h:892
bool isImplicitDef() const
Definition: MachineInstr.h:866
const AMDGPUAS & AS
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
bool isDebugInstr() const
Definition: MachineInstr.h:851
SlotIndex endPoint() const
Return the end point of the last live range segment to interact with the instruction, if any.
Definition: LiveInterval.h:147
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) INITIALIZE_PASS_END(RegisterCoalescer
static cl::opt< cl::boolOrDefault > EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), cl::init(cl::BOU_UNSET), cl::Hidden)
Temporary flag to test global copy optimization.
unsigned first
unsigned getSrcIdx() const
Return the subregister index that SrcReg will be coalesced into, or 0.
void setIsKill(bool Val=true)
virtual bool isAsCheapAsAMove(const MachineInstr &MI) const
Return true if the instruction is as cheap as a move instruction.
Basic Register Allocator
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
virtual bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const
Subtarget Hooks.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1142
void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
unsigned getDstIdx() const
Return the subregister index that DstReg will be coalesced into, or 0.
iterator_range< use_iterator > use_operands(unsigned Reg) const
simple register coalescing
bool isDebugValue() const
Definition: MachineInstr.h:849
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
MachineInstrBuilder MachineInstrBuilder & DefMI
Promote Memory to Register
Definition: Mem2Reg.cpp:110
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:224
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
LiveInterval & getInterval(unsigned Reg)
bool recomputeRegClass(unsigned Reg)
recomputeRegClass - Try to find a legal super-class of Reg&#39;s register class that still satisfies the ...
unsigned pred_size() const
virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void clearSubRanges()
Removes all subregister liveness information.
LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA, LaneBitmask Mask) const
Transforms a LaneMask computed for one subregister to the lanemask that would have been computed when...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
VNInfo * valueOutOrDead() const
Returns the value alive at the end of the instruction, if any.
Definition: LiveInterval.h:129
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:965
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
VNInfoList valnos
Definition: LiveInterval.h:200
bool isUnconditionalBranch(QueryType Type=AnyInBundle) const
Return true if this is a branch which always transfers control flow to some other block...
Definition: MachineInstr.h:531
unsigned succ_size() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
#define Success
unsigned getNumValNums() const
Definition: LiveInterval.h:301
Representation of each machine instruction.
Definition: MachineInstr.h:60
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
static cl::opt< bool > UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), cl::init(false), cl::Hidden)
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:149
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:436
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
void assign(const LiveRange &Other, BumpPtrAllocator &Allocator)
Copies values numbers and live segments from Other into this range.
Definition: LiveInterval.h:240
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
Definition: SlotIndexes.h:290
bool isEHPad() const
Returns true if the block is a landing pad.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
simple register Simple Register static false bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, unsigned &Src, unsigned &Dst, unsigned &SrcSub, unsigned &DstSub)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:53
void setSubReg(unsigned subReg)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
bool flip()
Swap SrcReg and DstReg.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const
Returns true if liveness for register class RC should be tracked at the subregister level...
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
SlotIndexes * getSlotIndexes() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:396
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
iterator begin()
Definition: LiveInterval.h:211
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const TargetRegisterClass * getNewRC() const
Return the register class of the coalesced register.
aarch64 promote const
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
simple register Simple Register Coalescing
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
IRTranslator LLVM IR MI
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:238
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none...
Definition: LiveInterval.h:424
bool isValid() const
Check if the iterator is at the end of the list.
VNInfo * valueDefined() const
Return the value defined by this instruction, if any.
Definition: LiveInterval.h:135
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
bool isTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA=nullptr) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
reg_instr_iterator reg_instr_begin(unsigned RegNo) const
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:700
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers...
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isImplicit() const
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos...