LLVM 18.0.0git
RegisterCoalescer.cpp
Go to the documentation of this file.
1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the generic RegisterCoalescer interface which
10// is used as the common interface used by all clients and
11// implementations of register coalescing.
12//
13//===----------------------------------------------------------------------===//
14
15#include "RegisterCoalescer.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/CodeGen/Passes.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/LaneBitmask.h"
45#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <limits>
57#include <tuple>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "regalloc"
64
65STATISTIC(numJoins , "Number of interval joins performed");
66STATISTIC(numCrossRCs , "Number of cross class joins performed");
67STATISTIC(numCommutes , "Number of instruction commuting performed");
68STATISTIC(numExtends , "Number of copies extended");
69STATISTIC(NumReMats , "Number of instructions re-materialized");
70STATISTIC(NumInflated , "Number of register classes inflated");
71STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
73STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
74
75static cl::opt<bool> EnableJoining("join-liveintervals",
76 cl::desc("Coalesce copies (default=true)"),
77 cl::init(true), cl::Hidden);
78
79static cl::opt<bool> UseTerminalRule("terminal-rule",
80 cl::desc("Apply the terminal rule"),
81 cl::init(false), cl::Hidden);
82
83/// Temporary flag to test critical edge unsplitting.
84static cl::opt<bool>
85EnableJoinSplits("join-splitedges",
86 cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87
88/// Temporary flag to test global copy optimization.
90EnableGlobalCopies("join-globalcopies",
91 cl::desc("Coalesce copies that span blocks (default=subtarget)"),
93
94static cl::opt<bool>
95VerifyCoalescing("verify-coalescing",
96 cl::desc("Verify machine instrs before and after register coalescing"),
98
100 "late-remat-update-threshold", cl::Hidden,
101 cl::desc("During rematerialization for a copy, if the def instruction has "
102 "many other copy uses to be rematerialized, delay the multiple "
103 "separate live interval update work and do them all at once after "
104 "all those rematerialization are done. It will save a lot of "
105 "repeated work. "),
106 cl::init(100));
107
109 "large-interval-size-threshold", cl::Hidden,
110 cl::desc("If the valnos size of an interval is larger than the threshold, "
111 "it is regarded as a large interval. "),
112 cl::init(100));
113
115 "large-interval-freq-threshold", cl::Hidden,
116 cl::desc("For a large interval, if it is coalesed with other live "
117 "intervals many times more than the threshold, stop its "
118 "coalescing to control the compile time. "),
119 cl::init(256));
120
121namespace {
122
123 class JoinVals;
124
125 class RegisterCoalescer : public MachineFunctionPass,
127 MachineFunction* MF = nullptr;
128 MachineRegisterInfo* MRI = nullptr;
129 const TargetRegisterInfo* TRI = nullptr;
130 const TargetInstrInfo* TII = nullptr;
131 LiveIntervals *LIS = nullptr;
132 const MachineLoopInfo* Loops = nullptr;
133 AliasAnalysis *AA = nullptr;
134 RegisterClassInfo RegClassInfo;
135
136 /// Position and VReg of a PHI instruction during coalescing.
137 struct PHIValPos {
138 SlotIndex SI; ///< Slot where this PHI occurs.
139 Register Reg; ///< VReg the PHI occurs in.
140 unsigned SubReg; ///< Qualifying subregister for Reg.
141 };
142
143 /// Map from debug instruction number to PHI position during coalescing.
145 /// Index of, for each VReg, which debug instruction numbers and
146 /// corresponding PHIs are sensitive to coalescing. Each VReg may have
147 /// multiple PHI defs, at different positions.
149
150 /// Debug variable location tracking -- for each VReg, maintain an
151 /// ordered-by-slot-index set of DBG_VALUEs, to help quick
152 /// identification of whether coalescing may change location validity.
153 using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
155
156 /// A LaneMask to remember on which subregister live ranges we need to call
157 /// shrinkToUses() later.
158 LaneBitmask ShrinkMask;
159
160 /// True if the main range of the currently coalesced intervals should be
161 /// checked for smaller live intervals.
162 bool ShrinkMainRange = false;
163
164 /// True if the coalescer should aggressively coalesce global copies
165 /// in favor of keeping local copies.
166 bool JoinGlobalCopies = false;
167
168 /// True if the coalescer should aggressively coalesce fall-thru
169 /// blocks exclusively containing copies.
170 bool JoinSplitEdges = false;
171
172 /// Copy instructions yet to be coalesced.
174 SmallVector<MachineInstr*, 8> LocalWorkList;
175
176 /// Set of instruction pointers that have been erased, and
177 /// that may be present in WorkList.
179
180 /// Dead instructions that are about to be deleted.
182
183 /// Virtual registers to be considered for register class inflation.
184 SmallVector<Register, 8> InflateRegs;
185
186 /// The collection of live intervals which should have been updated
187 /// immediately after rematerialiation but delayed until
188 /// lateLiveIntervalUpdate is called.
189 DenseSet<Register> ToBeUpdated;
190
191 /// Record how many times the large live interval with many valnos
192 /// has been tried to join with other live interval.
193 DenseMap<Register, unsigned long> LargeLIVisitCounter;
194
195 /// Recursively eliminate dead defs in DeadDefs.
196 void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
197
198 /// LiveRangeEdit callback for eliminateDeadDefs().
200
201 /// Coalesce the LocalWorkList.
202 void coalesceLocals();
203
204 /// Join compatible live intervals
205 void joinAllIntervals();
206
207 /// Coalesce copies in the specified MBB, putting
208 /// copies that cannot yet be coalesced into WorkList.
209 void copyCoalesceInMBB(MachineBasicBlock *MBB);
210
211 /// Tries to coalesce all copies in CurrList. Returns true if any progress
212 /// was made.
213 bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
214
215 /// If one def has many copy like uses, and those copy uses are all
216 /// rematerialized, the live interval update needed for those
217 /// rematerializations will be delayed and done all at once instead
218 /// of being done multiple times. This is to save compile cost because
219 /// live interval update is costly.
220 void lateLiveIntervalUpdate();
221
222 /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
223 /// has no value defined in the predecessors. If the incoming value is the
224 /// same as defined by the copy itself, the value is considered undefined.
225 bool copyValueUndefInPredecessors(LiveRange &S,
226 const MachineBasicBlock *MBB,
227 LiveQueryResult SLRQ);
228
229 /// Set necessary undef flags on subregister uses after pruning out undef
230 /// lane segments from the subrange.
231 void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
232 LaneBitmask PrunedLanes);
233
234 /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
235 /// src/dst of the copy instruction CopyMI. This returns true if the copy
236 /// was successfully coalesced away. If it is not currently possible to
237 /// coalesce this interval, but it may be possible if other things get
238 /// coalesced, then it returns true by reference in 'Again'.
239 bool joinCopy(MachineInstr *CopyMI, bool &Again);
240
241 /// Attempt to join these two intervals. On failure, this
242 /// returns false. The output "SrcInt" will not have been modified, so we
243 /// can use this information below to update aliases.
244 bool joinIntervals(CoalescerPair &CP);
245
246 /// Attempt joining two virtual registers. Return true on success.
247 bool joinVirtRegs(CoalescerPair &CP);
248
249 /// If a live interval has many valnos and is coalesced with other
250 /// live intervals many times, we regard such live interval as having
251 /// high compile time cost.
252 bool isHighCostLiveInterval(LiveInterval &LI);
253
254 /// Attempt joining with a reserved physreg.
255 bool joinReservedPhysReg(CoalescerPair &CP);
256
257 /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
258 /// Subranges in @p LI which only partially interfere with the desired
259 /// LaneMask are split as necessary. @p LaneMask are the lanes that
260 /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
261 /// lanemasks already adjusted to the coalesced register.
262 void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
263 LaneBitmask LaneMask, CoalescerPair &CP,
264 unsigned DstIdx);
265
266 /// Join the liveranges of two subregisters. Joins @p RRange into
267 /// @p LRange, @p RRange may be invalid afterwards.
268 void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
269 LaneBitmask LaneMask, const CoalescerPair &CP);
270
271 /// We found a non-trivially-coalescable copy. If the source value number is
272 /// defined by a copy from the destination reg see if we can merge these two
273 /// destination reg valno# into a single value number, eliminating a copy.
274 /// This returns true if an interval was modified.
275 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
276
277 /// Return true if there are definitions of IntB
278 /// other than BValNo val# that can reach uses of AValno val# of IntA.
279 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
280 VNInfo *AValNo, VNInfo *BValNo);
281
282 /// We found a non-trivially-coalescable copy.
283 /// If the source value number is defined by a commutable instruction and
284 /// its other operand is coalesced to the copy dest register, see if we
285 /// can transform the copy into a noop by commuting the definition.
286 /// This returns a pair of two flags:
287 /// - the first element is true if an interval was modified,
288 /// - the second element is true if the destination interval needs
289 /// to be shrunk after deleting the copy.
290 std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
291 MachineInstr *CopyMI);
292
293 /// We found a copy which can be moved to its less frequent predecessor.
294 bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
295
296 /// If the source of a copy is defined by a
297 /// trivial computation, replace the copy by rematerialize the definition.
298 bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
299 bool &IsDefCopy);
300
301 /// Return true if a copy involving a physreg should be joined.
302 bool canJoinPhys(const CoalescerPair &CP);
303
304 /// Replace all defs and uses of SrcReg to DstReg and update the subregister
305 /// number if it is not zero. If DstReg is a physical register and the
306 /// existing subregister number of the def / use being updated is not zero,
307 /// make sure to set it to the correct physical subregister.
308 void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
309
310 /// If the given machine operand reads only undefined lanes add an undef
311 /// flag.
312 /// This can happen when undef uses were previously concealed by a copy
313 /// which we coalesced. Example:
314 /// %0:sub0<def,read-undef> = ...
315 /// %1 = COPY %0 <-- Coalescing COPY reveals undef
316 /// = use %1:sub1 <-- hidden undef use
317 void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
318 MachineOperand &MO, unsigned SubRegIdx);
319
320 /// Handle copies of undef values. If the undef value is an incoming
321 /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
322 /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
323 /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
324 MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
325
326 /// Check whether or not we should apply the terminal rule on the
327 /// destination (Dst) of \p Copy.
328 /// When the terminal rule applies, Copy is not profitable to
329 /// coalesce.
330 /// Dst is terminal if it has exactly one affinity (Dst, Src) and
331 /// at least one interference (Dst, Dst2). If Dst is terminal, the
332 /// terminal rule consists in checking that at least one of
333 /// interfering node, say Dst2, has an affinity of equal or greater
334 /// weight with Src.
335 /// In that case, Dst2 and Dst will not be able to be both coalesced
336 /// with Src. Since Dst2 exposes more coalescing opportunities than
337 /// Dst, we can drop \p Copy.
338 bool applyTerminalRule(const MachineInstr &Copy) const;
339
340 /// Wrapper method for \see LiveIntervals::shrinkToUses.
341 /// This method does the proper fixing of the live-ranges when the afore
342 /// mentioned method returns true.
343 void shrinkToUses(LiveInterval *LI,
344 SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
345 NumShrinkToUses++;
346 if (LIS->shrinkToUses(LI, Dead)) {
347 /// Check whether or not \p LI is composed by multiple connected
348 /// components and if that is the case, fix that.
350 LIS->splitSeparateComponents(*LI, SplitLIs);
351 }
352 }
353
354 /// Wrapper Method to do all the necessary work when an Instruction is
355 /// deleted.
356 /// Optimizations should use this to make sure that deleted instructions
357 /// are always accounted for.
358 void deleteInstr(MachineInstr* MI) {
359 ErasedInstrs.insert(MI);
361 MI->eraseFromParent();
362 }
363
364 /// Walk over function and initialize the DbgVRegToValues map.
366
367 /// Test whether, after merging, any DBG_VALUEs would refer to a
368 /// different value number than before merging, and whether this can
369 /// be resolved. If not, mark the DBG_VALUE as being undef.
370 void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
371 JoinVals &LHSVals, LiveRange &RHS,
372 JoinVals &RHSVals);
373
374 void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
375 LiveRange &RegRange, JoinVals &Vals2);
376
377 public:
378 static char ID; ///< Class identification, replacement for typeinfo
379
380 RegisterCoalescer() : MachineFunctionPass(ID) {
382 }
383
384 void getAnalysisUsage(AnalysisUsage &AU) const override;
385
386 void releaseMemory() override;
387
388 /// This is the pass entry point.
390
391 /// Implement the dump method.
392 void print(raw_ostream &O, const Module* = nullptr) const override;
393 };
394
395} // end anonymous namespace
396
397char RegisterCoalescer::ID = 0;
398
399char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
400
401INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
402 "Register Coalescer", false, false)
407INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
409
410[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
412 Register &Dst, unsigned &SrcSub,
413 unsigned &DstSub) {
414 if (MI->isCopy()) {
415 Dst = MI->getOperand(0).getReg();
416 DstSub = MI->getOperand(0).getSubReg();
417 Src = MI->getOperand(1).getReg();
418 SrcSub = MI->getOperand(1).getSubReg();
419 } else if (MI->isSubregToReg()) {
420 Dst = MI->getOperand(0).getReg();
421 DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
422 MI->getOperand(3).getImm());
423 Src = MI->getOperand(2).getReg();
424 SrcSub = MI->getOperand(2).getSubReg();
425 } else
426 return false;
427 return true;
428}
429
430/// Return true if this block should be vacated by the coalescer to eliminate
431/// branches. The important cases to handle in the coalescer are critical edges
432/// split during phi elimination which contain only copies. Simple blocks that
433/// contain non-branches should also be vacated, but this can be handled by an
434/// earlier pass similar to early if-conversion.
435static bool isSplitEdge(const MachineBasicBlock *MBB) {
436 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
437 return false;
438
439 for (const auto &MI : *MBB) {
440 if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
441 return false;
442 }
443 return true;
444}
445
447 SrcReg = DstReg = Register();
448 SrcIdx = DstIdx = 0;
449 NewRC = nullptr;
450 Flipped = CrossClass = false;
451
452 Register Src, Dst;
453 unsigned SrcSub = 0, DstSub = 0;
454 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
455 return false;
456 Partial = SrcSub || DstSub;
457
458 // If one register is a physreg, it must be Dst.
459 if (Src.isPhysical()) {
460 if (Dst.isPhysical())
461 return false;
462 std::swap(Src, Dst);
463 std::swap(SrcSub, DstSub);
464 Flipped = true;
465 }
466
467 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
468
469 if (Dst.isPhysical()) {
470 // Eliminate DstSub on a physreg.
471 if (DstSub) {
472 Dst = TRI.getSubReg(Dst, DstSub);
473 if (!Dst) return false;
474 DstSub = 0;
475 }
476
477 // Eliminate SrcSub by picking a corresponding Dst superregister.
478 if (SrcSub) {
479 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
480 if (!Dst) return false;
481 } else if (!MRI.getRegClass(Src)->contains(Dst)) {
482 return false;
483 }
484 } else {
485 // Both registers are virtual.
486 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
487 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
488
489 // Both registers have subreg indices.
490 if (SrcSub && DstSub) {
491 // Copies between different sub-registers are never coalescable.
492 if (Src == Dst && SrcSub != DstSub)
493 return false;
494
495 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
496 SrcIdx, DstIdx);
497 if (!NewRC)
498 return false;
499 } else if (DstSub) {
500 // SrcReg will be merged with a sub-register of DstReg.
501 SrcIdx = DstSub;
502 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
503 } else if (SrcSub) {
504 // DstReg will be merged with a sub-register of SrcReg.
505 DstIdx = SrcSub;
506 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
507 } else {
508 // This is a straight copy without sub-registers.
509 NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
510 }
511
512 // The combined constraint may be impossible to satisfy.
513 if (!NewRC)
514 return false;
515
516 // Prefer SrcReg to be a sub-register of DstReg.
517 // FIXME: Coalescer should support subregs symmetrically.
518 if (DstIdx && !SrcIdx) {
519 std::swap(Src, Dst);
520 std::swap(SrcIdx, DstIdx);
521 Flipped = !Flipped;
522 }
523
524 CrossClass = NewRC != DstRC || NewRC != SrcRC;
525 }
526 // Check our invariants
527 assert(Src.isVirtual() && "Src must be virtual");
528 assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
529 SrcReg = Src;
530 DstReg = Dst;
531 return true;
532}
533
535 if (DstReg.isPhysical())
536 return false;
537 std::swap(SrcReg, DstReg);
538 std::swap(SrcIdx, DstIdx);
539 Flipped = !Flipped;
540 return true;
541}
542
544 if (!MI)
545 return false;
546 Register Src, Dst;
547 unsigned SrcSub = 0, DstSub = 0;
548 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
549 return false;
550
551 // Find the virtual register that is SrcReg.
552 if (Dst == SrcReg) {
553 std::swap(Src, Dst);
554 std::swap(SrcSub, DstSub);
555 } else if (Src != SrcReg) {
556 return false;
557 }
558
559 // Now check that Dst matches DstReg.
560 if (DstReg.isPhysical()) {
561 if (!Dst.isPhysical())
562 return false;
563 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
564 // DstSub could be set for a physreg from INSERT_SUBREG.
565 if (DstSub)
566 Dst = TRI.getSubReg(Dst, DstSub);
567 // Full copy of Src.
568 if (!SrcSub)
569 return DstReg == Dst;
570 // This is a partial register copy. Check that the parts match.
571 return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
572 } else {
573 // DstReg is virtual.
574 if (DstReg != Dst)
575 return false;
576 // Registers match, do the subregisters line up?
577 return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
578 TRI.composeSubRegIndices(DstIdx, DstSub);
579 }
580}
581
582void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
583 AU.setPreservesCFG();
592}
593
594void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
595 if (Edit) {
596 Edit->eliminateDeadDefs(DeadDefs);
597 return;
598 }
600 LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
601 nullptr, this).eliminateDeadDefs(DeadDefs);
602}
603
604void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
605 // MI may be in WorkList. Make sure we don't visit it.
606 ErasedInstrs.insert(MI);
607}
608
609bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
610 MachineInstr *CopyMI) {
611 assert(!CP.isPartial() && "This doesn't work for partial copies.");
612 assert(!CP.isPhys() && "This doesn't work for physreg copies.");
613
614 LiveInterval &IntA =
615 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
616 LiveInterval &IntB =
617 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
618 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
619
620 // We have a non-trivially-coalescable copy with IntA being the source and
621 // IntB being the dest, thus this defines a value number in IntB. If the
622 // source value number (in IntA) is defined by a copy from B, see if we can
623 // merge these two pieces of B into a single value number, eliminating a copy.
624 // For example:
625 //
626 // A3 = B0
627 // ...
628 // B1 = A3 <- this copy
629 //
630 // In this case, B0 can be extended to where the B1 copy lives, allowing the
631 // B1 value number to be replaced with B0 (which simplifies the B
632 // liveinterval).
633
634 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
635 // the example above.
637 if (BS == IntB.end()) return false;
638 VNInfo *BValNo = BS->valno;
639
640 // Get the location that B is defined at. Two options: either this value has
641 // an unknown definition point or it is defined at CopyIdx. If unknown, we
642 // can't process it.
643 if (BValNo->def != CopyIdx) return false;
644
645 // AValNo is the value number in A that defines the copy, A3 in the example.
646 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
647 LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
648 // The live segment might not exist after fun with physreg coalescing.
649 if (AS == IntA.end()) return false;
650 VNInfo *AValNo = AS->valno;
651
652 // If AValNo is defined as a copy from IntB, we can potentially process this.
653 // Get the instruction that defines this value number.
654 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
655 // Don't allow any partial copies, even if isCoalescable() allows them.
656 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
657 return false;
658
659 // Get the Segment in IntB that this value number starts with.
661 IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
662 if (ValS == IntB.end())
663 return false;
664
665 // Make sure that the end of the live segment is inside the same block as
666 // CopyMI.
667 MachineInstr *ValSEndInst =
668 LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
669 if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
670 return false;
671
672 // Okay, we now know that ValS ends in the same block that the CopyMI
673 // live-range starts. If there are no intervening live segments between them
674 // in IntB, we can merge them.
675 if (ValS+1 != BS) return false;
676
677 LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
678
679 SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
680 // We are about to delete CopyMI, so need to remove it as the 'instruction
681 // that defines this value #'. Update the valnum with the new defining
682 // instruction #.
683 BValNo->def = FillerStart;
684
685 // Okay, we can merge them. We need to insert a new liverange:
686 // [ValS.end, BS.begin) of either value number, then we merge the
687 // two value numbers.
688 IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
689
690 // Okay, merge "B1" into the same value number as "B0".
691 if (BValNo != ValS->valno)
692 IntB.MergeValueNumberInto(BValNo, ValS->valno);
693
694 // Do the same for the subregister segments.
695 for (LiveInterval::SubRange &S : IntB.subranges()) {
696 // Check for SubRange Segments of the form [1234r,1234d:0) which can be
697 // removed to prevent creating bogus SubRange Segments.
698 LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
699 if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
700 S.removeSegment(*SS, true);
701 continue;
702 }
703 // The subrange may have ended before FillerStart. If so, extend it.
704 if (!S.getVNInfoAt(FillerStart)) {
705 SlotIndex BBStart =
706 LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
707 S.extendInBlock(BBStart, FillerStart);
708 }
709 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
710 S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
711 VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
712 if (SubBValNo != SubValSNo)
713 S.MergeValueNumberInto(SubBValNo, SubValSNo);
714 }
715
716 LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
717
718 // If the source instruction was killing the source register before the
719 // merge, unset the isKill marker given the live range has been extended.
720 int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
721 if (UIdx != -1) {
722 ValSEndInst->getOperand(UIdx).setIsKill(false);
723 }
724
725 // Rewrite the copy.
726 CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
727 // If the copy instruction was killing the destination register or any
728 // subrange before the merge trim the live range.
729 bool RecomputeLiveRange = AS->end == CopyIdx;
730 if (!RecomputeLiveRange) {
731 for (LiveInterval::SubRange &S : IntA.subranges()) {
732 LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
733 if (SS != S.end() && SS->end == CopyIdx) {
734 RecomputeLiveRange = true;
735 break;
736 }
737 }
738 }
739 if (RecomputeLiveRange)
740 shrinkToUses(&IntA);
741
742 ++numExtends;
743 return true;
744}
745
746bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
747 LiveInterval &IntB,
748 VNInfo *AValNo,
749 VNInfo *BValNo) {
750 // If AValNo has PHI kills, conservatively assume that IntB defs can reach
751 // the PHI values.
752 if (LIS->hasPHIKill(IntA, AValNo))
753 return true;
754
755 for (LiveRange::Segment &ASeg : IntA.segments) {
756 if (ASeg.valno != AValNo) continue;
758 if (BI != IntB.begin())
759 --BI;
760 for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
761 if (BI->valno == BValNo)
762 continue;
763 if (BI->start <= ASeg.start && BI->end > ASeg.start)
764 return true;
765 if (BI->start > ASeg.start && BI->start < ASeg.end)
766 return true;
767 }
768 }
769 return false;
770}
771
772/// Copy segments with value number @p SrcValNo from liverange @p Src to live
773/// range @Dst and use value number @p DstValNo there.
774static std::pair<bool,bool>
775addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
776 const VNInfo *SrcValNo) {
777 bool Changed = false;
778 bool MergedWithDead = false;
779 for (const LiveRange::Segment &S : Src.segments) {
780 if (S.valno != SrcValNo)
781 continue;
782 // This is adding a segment from Src that ends in a copy that is about
783 // to be removed. This segment is going to be merged with a pre-existing
784 // segment in Dst. This works, except in cases when the corresponding
785 // segment in Dst is dead. For example: adding [192r,208r:1) from Src
786 // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
787 // Recognized such cases, so that the segments can be shrunk.
788 LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
789 LiveRange::Segment &Merged = *Dst.addSegment(Added);
790 if (Merged.end.isDead())
791 MergedWithDead = true;
792 Changed = true;
793 }
794 return std::make_pair(Changed, MergedWithDead);
795}
796
797std::pair<bool,bool>
798RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
799 MachineInstr *CopyMI) {
800 assert(!CP.isPhys());
801
802 LiveInterval &IntA =
803 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
804 LiveInterval &IntB =
805 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
806
807 // We found a non-trivially-coalescable copy with IntA being the source and
808 // IntB being the dest, thus this defines a value number in IntB. If the
809 // source value number (in IntA) is defined by a commutable instruction and
810 // its other operand is coalesced to the copy dest register, see if we can
811 // transform the copy into a noop by commuting the definition. For example,
812 //
813 // A3 = op A2 killed B0
814 // ...
815 // B1 = A3 <- this copy
816 // ...
817 // = op A3 <- more uses
818 //
819 // ==>
820 //
821 // B2 = op B0 killed A2
822 // ...
823 // B1 = B2 <- now an identity copy
824 // ...
825 // = op B2 <- more uses
826
827 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
828 // the example above.
829 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
830 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
831 assert(BValNo != nullptr && BValNo->def == CopyIdx);
832
833 // AValNo is the value number in A that defines the copy, A3 in the example.
834 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
835 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
836 if (AValNo->isPHIDef())
837 return { false, false };
839 if (!DefMI)
840 return { false, false };
841 if (!DefMI->isCommutable())
842 return { false, false };
843 // If DefMI is a two-address instruction then commuting it will change the
844 // destination register.
845 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
846 assert(DefIdx != -1);
847 unsigned UseOpIdx;
848 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
849 return { false, false };
850
851 // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
852 // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
853 // passed to the method. That _other_ operand is chosen by
854 // the findCommutedOpIndices() method.
855 //
856 // That is obviously an area for improvement in case of instructions having
857 // more than 2 operands. For example, if some instruction has 3 commutable
858 // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
859 // op#2<->op#3) of commute transformation should be considered/tried here.
860 unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
861 if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
862 return { false, false };
863
864 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
865 Register NewReg = NewDstMO.getReg();
866 if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
867 return { false, false };
868
869 // Make sure there are no other definitions of IntB that would reach the
870 // uses which the new definition can reach.
871 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
872 return { false, false };
873
874 // If some of the uses of IntA.reg is already coalesced away, return false.
875 // It's not possible to determine whether it's safe to perform the coalescing.
876 for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
877 MachineInstr *UseMI = MO.getParent();
878 unsigned OpNo = &MO - &UseMI->getOperand(0);
879 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
881 if (US == IntA.end() || US->valno != AValNo)
882 continue;
883 // If this use is tied to a def, we can't rewrite the register.
884 if (UseMI->isRegTiedToDefOperand(OpNo))
885 return { false, false };
886 }
887
888 LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
889 << *DefMI);
890
891 // At this point we have decided that it is legal to do this
892 // transformation. Start by commuting the instruction.
894 MachineInstr *NewMI =
895 TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
896 if (!NewMI)
897 return { false, false };
898 if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
899 !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
900 return { false, false };
901 if (NewMI != DefMI) {
902 LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
904 MBB->insert(Pos, NewMI);
905 MBB->erase(DefMI);
906 }
907
908 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
909 // A = or A, B
910 // ...
911 // B = A
912 // ...
913 // C = killed A
914 // ...
915 // = B
916
917 // Update uses of IntA of the specific Val# with IntB.
918 for (MachineOperand &UseMO :
919 llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
920 if (UseMO.isUndef())
921 continue;
922 MachineInstr *UseMI = UseMO.getParent();
923 if (UseMI->isDebugInstr()) {
924 // FIXME These don't have an instruction index. Not clear we have enough
925 // info to decide whether to do this replacement or not. For now do it.
926 UseMO.setReg(NewReg);
927 continue;
928 }
929 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
931 assert(US != IntA.end() && "Use must be live");
932 if (US->valno != AValNo)
933 continue;
934 // Kill flags are no longer accurate. They are recomputed after RA.
935 UseMO.setIsKill(false);
936 if (NewReg.isPhysical())
937 UseMO.substPhysReg(NewReg, *TRI);
938 else
939 UseMO.setReg(NewReg);
940 if (UseMI == CopyMI)
941 continue;
942 if (!UseMI->isCopy())
943 continue;
944 if (UseMI->getOperand(0).getReg() != IntB.reg() ||
946 continue;
947
948 // This copy will become a noop. If it's defining a new val#, merge it into
949 // BValNo.
950 SlotIndex DefIdx = UseIdx.getRegSlot();
951 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
952 if (!DVNI)
953 continue;
954 LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
955 assert(DVNI->def == DefIdx);
956 BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
957 for (LiveInterval::SubRange &S : IntB.subranges()) {
958 VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
959 if (!SubDVNI)
960 continue;
961 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
962 assert(SubBValNo->def == CopyIdx);
963 S.MergeValueNumberInto(SubDVNI, SubBValNo);
964 }
965
966 deleteInstr(UseMI);
967 }
968
969 // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
970 // is updated.
971 bool ShrinkB = false;
973 if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
974 if (!IntA.hasSubRanges()) {
975 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
976 IntA.createSubRangeFrom(Allocator, Mask, IntA);
977 } else if (!IntB.hasSubRanges()) {
978 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
979 IntB.createSubRangeFrom(Allocator, Mask, IntB);
980 }
981 SlotIndex AIdx = CopyIdx.getRegSlot(true);
982 LaneBitmask MaskA;
983 const SlotIndexes &Indexes = *LIS->getSlotIndexes();
984 for (LiveInterval::SubRange &SA : IntA.subranges()) {
985 VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
986 // Even if we are dealing with a full copy, some lanes can
987 // still be undefined.
988 // E.g.,
989 // undef A.subLow = ...
990 // B = COPY A <== A.subHigh is undefined here and does
991 // not have a value number.
992 if (!ASubValNo)
993 continue;
994 MaskA |= SA.LaneMask;
995
996 IntB.refineSubRanges(
997 Allocator, SA.LaneMask,
998 [&Allocator, &SA, CopyIdx, ASubValNo,
999 &ShrinkB](LiveInterval::SubRange &SR) {
1000 VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1001 : SR.getVNInfoAt(CopyIdx);
1002 assert(BSubValNo != nullptr);
1003 auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1004 ShrinkB |= P.second;
1005 if (P.first)
1006 BSubValNo->def = ASubValNo->def;
1007 },
1008 Indexes, *TRI);
1009 }
1010 // Go over all subranges of IntB that have not been covered by IntA,
1011 // and delete the segments starting at CopyIdx. This can happen if
1012 // IntA has undef lanes that are defined in IntB.
1013 for (LiveInterval::SubRange &SB : IntB.subranges()) {
1014 if ((SB.LaneMask & MaskA).any())
1015 continue;
1016 if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1017 if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1018 SB.removeSegment(*S, true);
1019 }
1020 }
1021
1022 BValNo->def = AValNo->def;
1023 auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1024 ShrinkB |= P.second;
1025 LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1026
1027 LIS->removeVRegDefAt(IntA, AValNo->def);
1028
1029 LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
1030 ++numCommutes;
1031 return { true, ShrinkB };
1032}
1033
1034/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1035/// predecessor of BB2, and if B is not redefined on the way from A = B
1036/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1037/// execution goes through the path from BB0 to BB2. We may move B = A
1038/// to the predecessor without such reversed copy.
1039/// So we will transform the program from:
1040/// BB0:
1041/// A = B; BB1:
1042/// ... ...
1043/// / \ /
1044/// BB2:
1045/// ...
1046/// B = A;
1047///
1048/// to:
1049///
1050/// BB0: BB1:
1051/// A = B; ...
1052/// ... B = A;
1053/// / \ /
1054/// BB2:
1055/// ...
1056///
1057/// A special case is when BB0 and BB2 are the same BB which is the only
1058/// BB in a loop:
1059/// BB1:
1060/// ...
1061/// BB0/BB2: ----
1062/// B = A; |
1063/// ... |
1064/// A = B; |
1065/// |-------
1066/// |
1067/// We may hoist B = A from BB0/BB2 to BB1.
1068///
1069/// The major preconditions for correctness to remove such partial
1070/// redundancy include:
1071/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1072/// the PHI is defined by the reversed copy A = B in BB0.
1073/// 2. No B is referenced from the start of BB2 to B = A.
1074/// 3. No B is defined from A = B to the end of BB0.
1075/// 4. BB1 has only one successor.
1076///
1077/// 2 and 4 implicitly ensure B is not live at the end of BB1.
1078/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1079/// colder place, which not only prevent endless loop, but also make sure
1080/// the movement of copy is beneficial.
1081bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1082 MachineInstr &CopyMI) {
1083 assert(!CP.isPhys());
1084 if (!CopyMI.isFullCopy())
1085 return false;
1086
1087 MachineBasicBlock &MBB = *CopyMI.getParent();
1088 // If this block is the target of an invoke/inlineasm_br, moving the copy into
1089 // the predecessor is tricker, and we don't handle it.
1091 return false;
1092
1093 if (MBB.pred_size() != 2)
1094 return false;
1095
1096 LiveInterval &IntA =
1097 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1098 LiveInterval &IntB =
1099 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1100
1101 // A is defined by PHI at the entry of MBB.
1102 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1103 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1104 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1105 if (!AValNo->isPHIDef())
1106 return false;
1107
1108 // No B is referenced before CopyMI in MBB.
1109 if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1110 return false;
1111
1112 // MBB has two predecessors: one contains A = B so no copy will be inserted
1113 // for it. The other one will have a copy moved from MBB.
1114 bool FoundReverseCopy = false;
1115 MachineBasicBlock *CopyLeftBB = nullptr;
1116 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1117 VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1119 if (!DefMI || !DefMI->isFullCopy()) {
1120 CopyLeftBB = Pred;
1121 continue;
1122 }
1123 // Check DefMI is a reverse copy and it is in BB Pred.
1124 if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1125 DefMI->getOperand(1).getReg() != IntB.reg() ||
1126 DefMI->getParent() != Pred) {
1127 CopyLeftBB = Pred;
1128 continue;
1129 }
1130 // If there is any other def of B after DefMI and before the end of Pred,
1131 // we need to keep the copy of B = A at the end of Pred if we remove
1132 // B = A from MBB.
1133 bool ValB_Changed = false;
1134 for (auto *VNI : IntB.valnos) {
1135 if (VNI->isUnused())
1136 continue;
1137 if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1138 ValB_Changed = true;
1139 break;
1140 }
1141 }
1142 if (ValB_Changed) {
1143 CopyLeftBB = Pred;
1144 continue;
1145 }
1146 FoundReverseCopy = true;
1147 }
1148
1149 // If no reverse copy is found in predecessors, nothing to do.
1150 if (!FoundReverseCopy)
1151 return false;
1152
1153 // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1154 // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1155 // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1156 // update IntA/IntB.
1157 //
1158 // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1159 // MBB is hotter than CopyLeftBB.
1160 if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1161 return false;
1162
1163 // Now (almost sure it's) ok to move copy.
1164 if (CopyLeftBB) {
1165 // Position in CopyLeftBB where we should insert new copy.
1166 auto InsPos = CopyLeftBB->getFirstTerminator();
1167
1168 // Make sure that B isn't referenced in the terminators (if any) at the end
1169 // of the predecessor since we're about to insert a new definition of B
1170 // before them.
1171 if (InsPos != CopyLeftBB->end()) {
1172 SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1173 if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1174 return false;
1175 }
1176
1177 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1178 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1179
1180 // Insert new copy to CopyLeftBB.
1181 MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1182 TII->get(TargetOpcode::COPY), IntB.reg())
1183 .addReg(IntA.reg());
1184 SlotIndex NewCopyIdx =
1185 LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1186 IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1187 for (LiveInterval::SubRange &SR : IntB.subranges())
1188 SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1189
1190 // If the newly created Instruction has an address of an instruction that was
1191 // deleted before (object recycled by the allocator) it needs to be removed from
1192 // the deleted list.
1193 ErasedInstrs.erase(NewCopyMI);
1194 } else {
1195 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1196 << printMBBReference(MBB) << '\t' << CopyMI);
1197 }
1198
1199 // Remove CopyMI.
1200 // Note: This is fine to remove the copy before updating the live-ranges.
1201 // While updating the live-ranges, we only look at slot indices and
1202 // never go back to the instruction.
1203 // Mark instructions as deleted.
1204 deleteInstr(&CopyMI);
1205
1206 // Update the liveness.
1207 SmallVector<SlotIndex, 8> EndPoints;
1208 VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1209 LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1210 &EndPoints);
1211 BValNo->markUnused();
1212 // Extend IntB to the EndPoints of its original live interval.
1213 LIS->extendToIndices(IntB, EndPoints);
1214
1215 // Now, do the same for its subranges.
1216 for (LiveInterval::SubRange &SR : IntB.subranges()) {
1217 EndPoints.clear();
1218 VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1219 assert(BValNo && "All sublanes should be live");
1220 LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1221 BValNo->markUnused();
1222 // We can have a situation where the result of the original copy is live,
1223 // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1224 // the copy appear as an endpoint from pruneValue(), but we don't want it
1225 // to because the copy has been removed. We can go ahead and remove that
1226 // endpoint; there is no other situation here that there could be a use at
1227 // the same place as we know that the copy is a full copy.
1228 for (unsigned I = 0; I != EndPoints.size(); ) {
1229 if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1230 EndPoints[I] = EndPoints.back();
1231 EndPoints.pop_back();
1232 continue;
1233 }
1234 ++I;
1235 }
1237 IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1238 *LIS->getSlotIndexes());
1239 LIS->extendToIndices(SR, EndPoints, Undefs);
1240 }
1241 // If any dead defs were extended, truncate them.
1242 shrinkToUses(&IntB);
1243
1244 // Finally, update the live-range of IntA.
1245 shrinkToUses(&IntA);
1246 return true;
1247}
1248
1249/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1250/// defining a subregister.
1251static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1252 assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1253
1254 for (const MachineOperand &Op : MI.all_defs()) {
1255 if (Op.getReg() != Reg)
1256 continue;
1257 // Return true if we define the full register or don't care about the value
1258 // inside other subregisters.
1259 if (Op.getSubReg() == 0 || Op.isUndef())
1260 return true;
1261 }
1262 return false;
1263}
1264
1265bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1266 MachineInstr *CopyMI,
1267 bool &IsDefCopy) {
1268 IsDefCopy = false;
1269 Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1270 unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1271 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1272 unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1273 if (SrcReg.isPhysical())
1274 return false;
1275
1276 LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1277 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1278 VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1279 if (!ValNo)
1280 return false;
1281 if (ValNo->isPHIDef() || ValNo->isUnused())
1282 return false;
1284 if (!DefMI)
1285 return false;
1286 if (DefMI->isCopyLike()) {
1287 IsDefCopy = true;
1288 return false;
1289 }
1290 if (!TII->isAsCheapAsAMove(*DefMI))
1291 return false;
1292
1294 LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1295 if (!Edit.checkRematerializable(ValNo, DefMI))
1296 return false;
1297
1298 if (!definesFullReg(*DefMI, SrcReg))
1299 return false;
1300 bool SawStore = false;
1301 if (!DefMI->isSafeToMove(AA, SawStore))
1302 return false;
1303 const MCInstrDesc &MCID = DefMI->getDesc();
1304 if (MCID.getNumDefs() != 1)
1305 return false;
1306 // Only support subregister destinations when the def is read-undef.
1307 MachineOperand &DstOperand = CopyMI->getOperand(0);
1308 Register CopyDstReg = DstOperand.getReg();
1309 if (DstOperand.getSubReg() && !DstOperand.isUndef())
1310 return false;
1311
1312 // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1313 // the register substantially (beyond both source and dest size). This is bad
1314 // for performance since it can cascade through a function, introducing many
1315 // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1316 // around after a few subreg copies).
1317 if (SrcIdx && DstIdx)
1318 return false;
1319
1320 const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1321 if (!DefMI->isImplicitDef()) {
1322 if (DstReg.isPhysical()) {
1323 Register NewDstReg = DstReg;
1324
1325 unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1326 DefMI->getOperand(0).getSubReg());
1327 if (NewDstIdx)
1328 NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1329
1330 // Finally, make sure that the physical subregister that will be
1331 // constructed later is permitted for the instruction.
1332 if (!DefRC->contains(NewDstReg))
1333 return false;
1334 } else {
1335 // Theoretically, some stack frame reference could exist. Just make sure
1336 // it hasn't actually happened.
1337 assert(DstReg.isVirtual() &&
1338 "Only expect to deal with virtual or physical registers");
1339 }
1340 }
1341
1342 LiveRangeEdit::Remat RM(ValNo);
1343 RM.OrigMI = DefMI;
1344 if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1345 return false;
1346
1347 DebugLoc DL = CopyMI->getDebugLoc();
1348 MachineBasicBlock *MBB = CopyMI->getParent();
1350 std::next(MachineBasicBlock::iterator(CopyMI));
1351 Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1352 MachineInstr &NewMI = *std::prev(MII);
1353 NewMI.setDebugLoc(DL);
1354
1355 // In a situation like the following:
1356 // %0:subreg = instr ; DefMI, subreg = DstIdx
1357 // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
1358 // instead of widening %1 to the register class of %0 simply do:
1359 // %1 = instr
1360 const TargetRegisterClass *NewRC = CP.getNewRC();
1361 if (DstIdx != 0) {
1362 MachineOperand &DefMO = NewMI.getOperand(0);
1363 if (DefMO.getSubReg() == DstIdx) {
1364 assert(SrcIdx == 0 && CP.isFlipped()
1365 && "Shouldn't have SrcIdx+DstIdx at this point");
1366 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1367 const TargetRegisterClass *CommonRC =
1368 TRI->getCommonSubClass(DefRC, DstRC);
1369 if (CommonRC != nullptr) {
1370 NewRC = CommonRC;
1371
1372 // Instruction might contain "undef %0:subreg" as use operand:
1373 // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1374 //
1375 // Need to check all operands.
1376 for (MachineOperand &MO : NewMI.operands()) {
1377 if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1378 MO.setSubReg(0);
1379 }
1380 }
1381
1382 DstIdx = 0;
1383 DefMO.setIsUndef(false); // Only subregs can have def+undef.
1384 }
1385 }
1386 }
1387
1388 // CopyMI may have implicit operands, save them so that we can transfer them
1389 // over to the newly materialized instruction after CopyMI is removed.
1391 ImplicitOps.reserve(CopyMI->getNumOperands() -
1392 CopyMI->getDesc().getNumOperands());
1393 for (unsigned I = CopyMI->getDesc().getNumOperands(),
1394 E = CopyMI->getNumOperands();
1395 I != E; ++I) {
1396 MachineOperand &MO = CopyMI->getOperand(I);
1397 if (MO.isReg()) {
1398 assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1399 assert(MO.getReg().isPhysical() && "unexpected implicit virtual register def");
1400 ImplicitOps.push_back(MO);
1401 }
1402 }
1403
1404 CopyMI->eraseFromParent();
1405 ErasedInstrs.insert(CopyMI);
1406
1407 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1408 // We need to remember these so we can add intervals once we insert
1409 // NewMI into SlotIndexes.
1410 SmallVector<MCRegister, 4> NewMIImplDefs;
1411 for (unsigned i = NewMI.getDesc().getNumOperands(),
1412 e = NewMI.getNumOperands();
1413 i != e; ++i) {
1414 MachineOperand &MO = NewMI.getOperand(i);
1415 if (MO.isReg() && MO.isDef()) {
1416 assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
1417 NewMIImplDefs.push_back(MO.getReg().asMCReg());
1418 }
1419 }
1420
1421 if (DstReg.isVirtual()) {
1422 unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1423
1424 if (DefRC != nullptr) {
1425 if (NewIdx)
1426 NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1427 else
1428 NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1429 assert(NewRC && "subreg chosen for remat incompatible with instruction");
1430 }
1431 // Remap subranges to new lanemask and change register class.
1432 LiveInterval &DstInt = LIS->getInterval(DstReg);
1433 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1434 SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1435 }
1436 MRI->setRegClass(DstReg, NewRC);
1437
1438 // Update machine operands and add flags.
1439 updateRegDefsUses(DstReg, DstReg, DstIdx);
1440 NewMI.getOperand(0).setSubReg(NewIdx);
1441 // updateRegDefUses can add an "undef" flag to the definition, since
1442 // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1443 // sure that "undef" is not set.
1444 if (NewIdx == 0)
1445 NewMI.getOperand(0).setIsUndef(false);
1446 // Add dead subregister definitions if we are defining the whole register
1447 // but only part of it is live.
1448 // This could happen if the rematerialization instruction is rematerializing
1449 // more than actually is used in the register.
1450 // An example would be:
1451 // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1452 // ; Copying only part of the register here, but the rest is undef.
1453 // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1454 // ==>
1455 // ; Materialize all the constants but only using one
1456 // %2 = LOAD_CONSTANTS 5, 8
1457 //
1458 // at this point for the part that wasn't defined before we could have
1459 // subranges missing the definition.
1460 if (NewIdx == 0 && DstInt.hasSubRanges()) {
1461 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1462 SlotIndex DefIndex =
1463 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1464 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1466 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1467 if (!SR.liveAt(DefIndex))
1468 SR.createDeadDef(DefIndex, Alloc);
1469 MaxMask &= ~SR.LaneMask;
1470 }
1471 if (MaxMask.any()) {
1472 LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1473 SR->createDeadDef(DefIndex, Alloc);
1474 }
1475 }
1476
1477 // Make sure that the subrange for resultant undef is removed
1478 // For example:
1479 // %1:sub1<def,read-undef> = LOAD CONSTANT 1
1480 // %2 = COPY %1
1481 // ==>
1482 // %2:sub1<def, read-undef> = LOAD CONSTANT 1
1483 // ; Correct but need to remove the subrange for %2:sub0
1484 // ; as it is now undef
1485 if (NewIdx != 0 && DstInt.hasSubRanges()) {
1486 // The affected subregister segments can be removed.
1487 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1488 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1489 bool UpdatedSubRanges = false;
1490 SlotIndex DefIndex =
1491 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1493 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1494 if ((SR.LaneMask & DstMask).none()) {
1496 << "Removing undefined SubRange "
1497 << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1498
1499 if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1500 // VNI is in ValNo - remove any segments in this SubRange that have
1501 // this ValNo
1502 SR.removeValNo(RmValNo);
1503 }
1504
1505 // We may not have a defined value at this point, but still need to
1506 // clear out any empty subranges tentatively created by
1507 // updateRegDefUses. The original subrange def may have only undefed
1508 // some lanes.
1509 UpdatedSubRanges = true;
1510 } else {
1511 // We know that this lane is defined by this instruction,
1512 // but at this point it may be empty because it is not used by
1513 // anything. This happens when updateRegDefUses adds the missing
1514 // lanes. Assign that lane a dead def so that the interferences
1515 // are properly modeled.
1516 if (SR.empty())
1517 SR.createDeadDef(DefIndex, Alloc);
1518 }
1519 }
1520 if (UpdatedSubRanges)
1521 DstInt.removeEmptySubRanges();
1522 }
1523 } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1524 // The New instruction may be defining a sub-register of what's actually
1525 // been asked for. If so it must implicitly define the whole thing.
1526 assert(DstReg.isPhysical() &&
1527 "Only expect virtual or physical registers in remat");
1528 NewMI.getOperand(0).setIsDead(true);
1530 CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1531 // Record small dead def live-ranges for all the subregisters
1532 // of the destination register.
1533 // Otherwise, variables that live through may miss some
1534 // interferences, thus creating invalid allocation.
1535 // E.g., i386 code:
1536 // %1 = somedef ; %1 GR8
1537 // %2 = remat ; %2 GR32
1538 // CL = COPY %2.sub_8bit
1539 // = somedef %1 ; %1 GR8
1540 // =>
1541 // %1 = somedef ; %1 GR8
1542 // dead ECX = remat ; implicit-def CL
1543 // = somedef %1 ; %1 GR8
1544 // %1 will see the interferences with CL but not with CH since
1545 // no live-ranges would have been created for ECX.
1546 // Fix that!
1547 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1548 for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
1549 if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1550 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1551 }
1552
1553 if (NewMI.getOperand(0).getSubReg())
1554 NewMI.getOperand(0).setIsUndef();
1555
1556 // Transfer over implicit operands to the rematerialized instruction.
1557 for (MachineOperand &MO : ImplicitOps)
1558 NewMI.addOperand(MO);
1559
1560 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1561 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1562 MCRegister Reg = NewMIImplDefs[i];
1563 for (MCRegUnit Unit : TRI->regunits(Reg))
1564 if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1565 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1566 }
1567
1568 LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1569 ++NumReMats;
1570
1571 // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1572 // to describe DstReg instead.
1573 if (MRI->use_nodbg_empty(SrcReg)) {
1574 for (MachineOperand &UseMO :
1575 llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1576 MachineInstr *UseMI = UseMO.getParent();
1577 if (UseMI->isDebugInstr()) {
1578 if (DstReg.isPhysical())
1579 UseMO.substPhysReg(DstReg, *TRI);
1580 else
1581 UseMO.setReg(DstReg);
1582 // Move the debug value directly after the def of the rematerialized
1583 // value in DstReg.
1584 MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1585 LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1586 }
1587 }
1588 }
1589
1590 if (ToBeUpdated.count(SrcReg))
1591 return true;
1592
1593 unsigned NumCopyUses = 0;
1594 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1595 if (UseMO.getParent()->isCopyLike())
1596 NumCopyUses++;
1597 }
1598 if (NumCopyUses < LateRematUpdateThreshold) {
1599 // The source interval can become smaller because we removed a use.
1600 shrinkToUses(&SrcInt, &DeadDefs);
1601 if (!DeadDefs.empty())
1602 eliminateDeadDefs(&Edit);
1603 } else {
1604 ToBeUpdated.insert(SrcReg);
1605 }
1606 return true;
1607}
1608
1609MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1610 // ProcessImplicitDefs may leave some copies of <undef> values, it only
1611 // removes local variables. When we have a copy like:
1612 //
1613 // %1 = COPY undef %2
1614 //
1615 // We delete the copy and remove the corresponding value number from %1.
1616 // Any uses of that value number are marked as <undef>.
1617
1618 // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1619 // CoalescerPair may have a new register class with adjusted subreg indices
1620 // at this point.
1621 Register SrcReg, DstReg;
1622 unsigned SrcSubIdx = 0, DstSubIdx = 0;
1623 if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1624 return nullptr;
1625
1626 SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1627 const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1628 // CopyMI is undef iff SrcReg is not live before the instruction.
1629 if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1630 LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1631 for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1632 if ((SR.LaneMask & SrcMask).none())
1633 continue;
1634 if (SR.liveAt(Idx))
1635 return nullptr;
1636 }
1637 } else if (SrcLI.liveAt(Idx))
1638 return nullptr;
1639
1640 // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1641 // then replace it with an IMPLICIT_DEF.
1642 LiveInterval &DstLI = LIS->getInterval(DstReg);
1643 SlotIndex RegIndex = Idx.getRegSlot();
1644 LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1645 assert(Seg != nullptr && "No segment for defining instruction");
1646 VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1647
1648 // The source interval may also have been on an undef use, in which case the
1649 // copy introduced a live value.
1650 if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1651 CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1652 for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1653 MachineOperand &MO = CopyMI->getOperand(i-1);
1654 if (MO.isReg() && MO.isUse())
1655 CopyMI->removeOperand(i-1);
1656 }
1657 LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1658 "implicit def\n");
1659 return CopyMI;
1660 }
1661
1662 // Remove any DstReg segments starting at the instruction.
1663 LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1664
1665 // Remove value or merge with previous one in case of a subregister def.
1666 if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1667 VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1668 DstLI.MergeValueNumberInto(VNI, PrevVNI);
1669
1670 // The affected subregister segments can be removed.
1671 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1672 for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1673 if ((SR.LaneMask & DstMask).none())
1674 continue;
1675
1676 VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1677 assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1678 SR.removeValNo(SVNI);
1679 }
1680 DstLI.removeEmptySubRanges();
1681 } else
1682 LIS->removeVRegDefAt(DstLI, RegIndex);
1683
1684 // Mark uses as undef.
1685 for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1686 if (MO.isDef() /*|| MO.isUndef()*/)
1687 continue;
1688 const MachineInstr &MI = *MO.getParent();
1689 SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1690 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1691 bool isLive;
1692 if (!UseMask.all() && DstLI.hasSubRanges()) {
1693 isLive = false;
1694 for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1695 if ((SR.LaneMask & UseMask).none())
1696 continue;
1697 if (SR.liveAt(UseIdx)) {
1698 isLive = true;
1699 break;
1700 }
1701 }
1702 } else
1703 isLive = DstLI.liveAt(UseIdx);
1704 if (isLive)
1705 continue;
1706 MO.setIsUndef(true);
1707 LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1708 }
1709
1710 // A def of a subregister may be a use of the other subregisters, so
1711 // deleting a def of a subregister may also remove uses. Since CopyMI
1712 // is still part of the function (but about to be erased), mark all
1713 // defs of DstReg in it as <undef>, so that shrinkToUses would
1714 // ignore them.
1715 for (MachineOperand &MO : CopyMI->all_defs())
1716 if (MO.getReg() == DstReg)
1717 MO.setIsUndef(true);
1718 LIS->shrinkToUses(&DstLI);
1719
1720 return CopyMI;
1721}
1722
1723void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1724 MachineOperand &MO, unsigned SubRegIdx) {
1725 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1726 if (MO.isDef())
1727 Mask = ~Mask;
1728 bool IsUndef = true;
1729 for (const LiveInterval::SubRange &S : Int.subranges()) {
1730 if ((S.LaneMask & Mask).none())
1731 continue;
1732 if (S.liveAt(UseIdx)) {
1733 IsUndef = false;
1734 break;
1735 }
1736 }
1737 if (IsUndef) {
1738 MO.setIsUndef(true);
1739 // We found out some subregister use is actually reading an undefined
1740 // value. In some cases the whole vreg has become undefined at this
1741 // point so we have to potentially shrink the main range if the
1742 // use was ending a live segment there.
1743 LiveQueryResult Q = Int.Query(UseIdx);
1744 if (Q.valueOut() == nullptr)
1745 ShrinkMainRange = true;
1746 }
1747}
1748
1749void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1750 unsigned SubIdx) {
1751 bool DstIsPhys = DstReg.isPhysical();
1752 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1753
1754 if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1755 for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1756 unsigned SubReg = MO.getSubReg();
1757 if (SubReg == 0 || MO.isUndef())
1758 continue;
1759 MachineInstr &MI = *MO.getParent();
1760 if (MI.isDebugInstr())
1761 continue;
1762 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1763 addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1764 }
1765 }
1766
1769 I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1770 I != E; ) {
1771 MachineInstr *UseMI = &*(I++);
1772
1773 // Each instruction can only be rewritten once because sub-register
1774 // composition is not always idempotent. When SrcReg != DstReg, rewriting
1775 // the UseMI operands removes them from the SrcReg use-def chain, but when
1776 // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1777 // operands mentioning the virtual register.
1778 if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1779 continue;
1780
1782 bool Reads, Writes;
1783 std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1784
1785 // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1786 // because SrcReg is a sub-register.
1787 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1788 Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1789
1790 // Replace SrcReg with DstReg in all UseMI operands.
1791 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1792 MachineOperand &MO = UseMI->getOperand(Ops[i]);
1793
1794 // Adjust <undef> flags in case of sub-register joins. We don't want to
1795 // turn a full def into a read-modify-write sub-register def and vice
1796 // versa.
1797 if (SubIdx && MO.isDef())
1798 MO.setIsUndef(!Reads);
1799
1800 // A subreg use of a partially undef (super) register may be a complete
1801 // undef use now and then has to be marked that way.
1802 if (MO.isUse() && !DstIsPhys) {
1803 unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1804 if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1805 if (!DstInt->hasSubRanges()) {
1807 LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1808 LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1809 LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1810 DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1811 // The unused lanes are just empty live-ranges at this point.
1812 // It is the caller responsibility to set the proper
1813 // dead segments if there is an actual dead def of the
1814 // unused lanes. This may happen with rematerialization.
1815 DstInt->createSubRange(Allocator, UnusedLanes);
1816 }
1817 SlotIndex MIIdx = UseMI->isDebugInstr()
1819 : LIS->getInstructionIndex(*UseMI);
1820 SlotIndex UseIdx = MIIdx.getRegSlot(true);
1821 addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1822 }
1823 }
1824
1825 if (DstIsPhys)
1826 MO.substPhysReg(DstReg, *TRI);
1827 else
1828 MO.substVirtReg(DstReg, SubIdx, *TRI);
1829 }
1830
1831 LLVM_DEBUG({
1832 dbgs() << "\t\tupdated: ";
1833 if (!UseMI->isDebugInstr())
1834 dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1835 dbgs() << *UseMI;
1836 });
1837 }
1838}
1839
1840bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1841 // Always join simple intervals that are defined by a single copy from a
1842 // reserved register. This doesn't increase register pressure, so it is
1843 // always beneficial.
1844 if (!MRI->isReserved(CP.getDstReg())) {
1845 LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1846 return false;
1847 }
1848
1849 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1850 if (JoinVInt.containsOneValue())
1851 return true;
1852
1853 LLVM_DEBUG(
1854 dbgs() << "\tCannot join complex intervals into reserved register.\n");
1855 return false;
1856}
1857
1858bool RegisterCoalescer::copyValueUndefInPredecessors(
1860 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1861 SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1862 if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1863 // If this is a self loop, we may be reading the same value.
1864 if (V->id != SLRQ.valueOutOrDead()->id)
1865 return false;
1866 }
1867 }
1868
1869 return true;
1870}
1871
1872void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1873 Register Reg,
1874 LaneBitmask PrunedLanes) {
1875 // If we had other instructions in the segment reading the undef sublane
1876 // value, we need to mark them with undef.
1877 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1878 unsigned SubRegIdx = MO.getSubReg();
1879 if (SubRegIdx == 0 || MO.isUndef())
1880 continue;
1881
1882 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1883 SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
1884 for (LiveInterval::SubRange &S : LI.subranges()) {
1885 if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
1886 MO.setIsUndef();
1887 break;
1888 }
1889 }
1890 }
1891
1893
1894 // A def of a subregister may be a use of other register lanes. Replacing
1895 // such a def with a def of a different register will eliminate the use,
1896 // and may cause the recorded live range to be larger than the actual
1897 // liveness in the program IR.
1898 LIS->shrinkToUses(&LI);
1899}
1900
1901bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1902 Again = false;
1903 LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1904
1906 if (!CP.setRegisters(CopyMI)) {
1907 LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1908 return false;
1909 }
1910
1911 if (CP.getNewRC()) {
1912 auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1913 auto DstRC = MRI->getRegClass(CP.getDstReg());
1914 unsigned SrcIdx = CP.getSrcIdx();
1915 unsigned DstIdx = CP.getDstIdx();
1916 if (CP.isFlipped()) {
1917 std::swap(SrcIdx, DstIdx);
1918 std::swap(SrcRC, DstRC);
1919 }
1920 if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1921 CP.getNewRC(), *LIS)) {
1922 LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1923 return false;
1924 }
1925 }
1926
1927 // Dead code elimination. This really should be handled by MachineDCE, but
1928 // sometimes dead copies slip through, and we can't generate invalid live
1929 // ranges.
1930 if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1931 LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1932 DeadDefs.push_back(CopyMI);
1933 eliminateDeadDefs();
1934 return true;
1935 }
1936
1937 // Eliminate undefs.
1938 if (!CP.isPhys()) {
1939 // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
1940 if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
1941 if (UndefMI->isImplicitDef())
1942 return false;
1943 deleteInstr(CopyMI);
1944 return false; // Not coalescable.
1945 }
1946 }
1947
1948 // Coalesced copies are normally removed immediately, but transformations
1949 // like removeCopyByCommutingDef() can inadvertently create identity copies.
1950 // When that happens, just join the values and remove the copy.
1951 if (CP.getSrcReg() == CP.getDstReg()) {
1952 LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1953 LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1954 const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1955 LiveQueryResult LRQ = LI.Query(CopyIdx);
1956 if (VNInfo *DefVNI = LRQ.valueDefined()) {
1957 VNInfo *ReadVNI = LRQ.valueIn();
1958 assert(ReadVNI && "No value before copy and no <undef> flag.");
1959 assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1960
1961 // Track incoming undef lanes we need to eliminate from the subrange.
1962 LaneBitmask PrunedLanes;
1963 MachineBasicBlock *MBB = CopyMI->getParent();
1964
1965 // Process subregister liveranges.
1966 for (LiveInterval::SubRange &S : LI.subranges()) {
1967 LiveQueryResult SLRQ = S.Query(CopyIdx);
1968 if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1969 if (VNInfo *SReadVNI = SLRQ.valueIn())
1970 SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
1971
1972 // If this copy introduced an undef subrange from an incoming value,
1973 // we need to eliminate the undef live in values from the subrange.
1974 if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
1975 LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
1976 PrunedLanes |= S.LaneMask;
1977 S.removeValNo(SDefVNI);
1978 }
1979 }
1980 }
1981
1982 LI.MergeValueNumberInto(DefVNI, ReadVNI);
1983 if (PrunedLanes.any()) {
1984 LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
1985 << PrunedLanes << '\n');
1986 setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
1987 }
1988
1989 LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
1990 }
1991 deleteInstr(CopyMI);
1992 return true;
1993 }
1994
1995 // Enforce policies.
1996 if (CP.isPhys()) {
1997 LLVM_DEBUG(dbgs() << "\tConsidering merging "
1998 << printReg(CP.getSrcReg(), TRI) << " with "
1999 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
2000 if (!canJoinPhys(CP)) {
2001 // Before giving up coalescing, if definition of source is defined by
2002 // trivial computation, try rematerializing it.
2003 bool IsDefCopy = false;
2004 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2005 return true;
2006 if (IsDefCopy)
2007 Again = true; // May be possible to coalesce later.
2008 return false;
2009 }
2010 } else {
2011 // When possible, let DstReg be the larger interval.
2012 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2013 LIS->getInterval(CP.getDstReg()).size())
2014 CP.flip();
2015
2016 LLVM_DEBUG({
2017 dbgs() << "\tConsidering merging to "
2018 << TRI->getRegClassName(CP.getNewRC()) << " with ";
2019 if (CP.getDstIdx() && CP.getSrcIdx())
2020 dbgs() << printReg(CP.getDstReg()) << " in "
2021 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2022 << printReg(CP.getSrcReg()) << " in "
2023 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2024 else
2025 dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2026 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2027 });
2028 }
2029
2030 ShrinkMask = LaneBitmask::getNone();
2031 ShrinkMainRange = false;
2032
2033 // Okay, attempt to join these two intervals. On failure, this returns false.
2034 // Otherwise, if one of the intervals being joined is a physreg, this method
2035 // always canonicalizes DstInt to be it. The output "SrcInt" will not have
2036 // been modified, so we can use this information below to update aliases.
2037 if (!joinIntervals(CP)) {
2038 // Coalescing failed.
2039
2040 // If definition of source is defined by trivial computation, try
2041 // rematerializing it.
2042 bool IsDefCopy = false;
2043 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2044 return true;
2045
2046 // If we can eliminate the copy without merging the live segments, do so
2047 // now.
2048 if (!CP.isPartial() && !CP.isPhys()) {
2049 bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2050 bool Shrink = false;
2051 if (!Changed)
2052 std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2053 if (Changed) {
2054 deleteInstr(CopyMI);
2055 if (Shrink) {
2056 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2057 LiveInterval &DstLI = LIS->getInterval(DstReg);
2058 shrinkToUses(&DstLI);
2059 LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
2060 }
2061 LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2062 return true;
2063 }
2064 }
2065
2066 // Try and see if we can partially eliminate the copy by moving the copy to
2067 // its predecessor.
2068 if (!CP.isPartial() && !CP.isPhys())
2069 if (removePartialRedundancy(CP, *CopyMI))
2070 return true;
2071
2072 // Otherwise, we are unable to join the intervals.
2073 LLVM_DEBUG(dbgs() << "\tInterference!\n");
2074 Again = true; // May be possible to coalesce later.
2075 return false;
2076 }
2077
2078 // Coalescing to a virtual register that is of a sub-register class of the
2079 // other. Make sure the resulting register is set to the right register class.
2080 if (CP.isCrossClass()) {
2081 ++numCrossRCs;
2082 MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2083 }
2084
2085 // Removing sub-register copies can ease the register class constraints.
2086 // Make sure we attempt to inflate the register class of DstReg.
2087 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2088 InflateRegs.push_back(CP.getDstReg());
2089
2090 // CopyMI has been erased by joinIntervals at this point. Remove it from
2091 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2092 // to the work list. This keeps ErasedInstrs from growing needlessly.
2093 ErasedInstrs.erase(CopyMI);
2094
2095 // Rewrite all SrcReg operands to DstReg.
2096 // Also update DstReg operands to include DstIdx if it is set.
2097 if (CP.getDstIdx())
2098 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2099 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2100
2101 // Shrink subregister ranges if necessary.
2102 if (ShrinkMask.any()) {
2103 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2104 for (LiveInterval::SubRange &S : LI.subranges()) {
2105 if ((S.LaneMask & ShrinkMask).none())
2106 continue;
2107 LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2108 << ")\n");
2109 LIS->shrinkToUses(S, LI.reg());
2110 ShrinkMainRange = true;
2111 }
2113 }
2114
2115 // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2116 // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2117 // is not up-to-date, need to update the merged live interval here.
2118 if (ToBeUpdated.count(CP.getSrcReg()))
2119 ShrinkMainRange = true;
2120
2121 if (ShrinkMainRange) {
2122 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2123 shrinkToUses(&LI);
2124 }
2125
2126 // SrcReg is guaranteed to be the register whose live interval that is
2127 // being merged.
2128 LIS->removeInterval(CP.getSrcReg());
2129
2130 // Update regalloc hint.
2131 TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2132
2133 LLVM_DEBUG({
2134 dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2135 << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2136 dbgs() << "\tResult = ";
2137 if (CP.isPhys())
2138 dbgs() << printReg(CP.getDstReg(), TRI);
2139 else
2140 dbgs() << LIS->getInterval(CP.getDstReg());
2141 dbgs() << '\n';
2142 });
2143
2144 ++numJoins;
2145 return true;
2146}
2147
2148bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2149 Register DstReg = CP.getDstReg();
2150 Register SrcReg = CP.getSrcReg();
2151 assert(CP.isPhys() && "Must be a physreg copy");
2152 assert(MRI->isReserved(DstReg) && "Not a reserved register");
2153 LiveInterval &RHS = LIS->getInterval(SrcReg);
2154 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2155
2156 assert(RHS.containsOneValue() && "Invalid join with reserved register");
2157
2158 // Optimization for reserved registers like ESP. We can only merge with a
2159 // reserved physreg if RHS has a single value that is a copy of DstReg.
2160 // The live range of the reserved register will look like a set of dead defs
2161 // - we don't properly track the live range of reserved registers.
2162
2163 // Deny any overlapping intervals. This depends on all the reserved
2164 // register live ranges to look like dead defs.
2165 if (!MRI->isConstantPhysReg(DstReg)) {
2166 for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2167 // Abort if not all the regunits are reserved.
2168 for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
2169 if (!MRI->isReserved(*RI))
2170 return false;
2171 }
2172 if (RHS.overlaps(LIS->getRegUnit(Unit))) {
2173 LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
2174 << '\n');
2175 return false;
2176 }
2177 }
2178
2179 // We must also check for overlaps with regmask clobbers.
2180 BitVector RegMaskUsable;
2181 if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2182 !RegMaskUsable.test(DstReg)) {
2183 LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2184 return false;
2185 }
2186 }
2187
2188 // Skip any value computations, we are not adding new values to the
2189 // reserved register. Also skip merging the live ranges, the reserved
2190 // register live range doesn't need to be accurate as long as all the
2191 // defs are there.
2192
2193 // Delete the identity copy.
2194 MachineInstr *CopyMI;
2195 if (CP.isFlipped()) {
2196 // Physreg is copied into vreg
2197 // %y = COPY %physreg_x
2198 // ... //< no other def of %physreg_x here
2199 // use %y
2200 // =>
2201 // ...
2202 // use %physreg_x
2203 CopyMI = MRI->getVRegDef(SrcReg);
2204 deleteInstr(CopyMI);
2205 } else {
2206 // VReg is copied into physreg:
2207 // %y = def
2208 // ... //< no other def or use of %physreg_x here
2209 // %physreg_x = COPY %y
2210 // =>
2211 // %physreg_x = def
2212 // ...
2213 if (!MRI->hasOneNonDBGUse(SrcReg)) {
2214 LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2215 return false;
2216 }
2217
2218 if (!LIS->intervalIsInOneMBB(RHS)) {
2219 LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2220 return false;
2221 }
2222
2223 MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2224 CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2225 SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2226 SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2227
2228 if (!MRI->isConstantPhysReg(DstReg)) {
2229 // We checked above that there are no interfering defs of the physical
2230 // register. However, for this case, where we intend to move up the def of
2231 // the physical register, we also need to check for interfering uses.
2232 SlotIndexes *Indexes = LIS->getSlotIndexes();
2233 for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2234 SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2236 if (MI->readsRegister(DstReg, TRI)) {
2237 LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2238 return false;
2239 }
2240 }
2241 }
2242
2243 // We're going to remove the copy which defines a physical reserved
2244 // register, so remove its valno, etc.
2245 LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2246 << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2247
2248 LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2249 deleteInstr(CopyMI);
2250
2251 // Create a new dead def at the new def location.
2252 for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2253 LiveRange &LR = LIS->getRegUnit(Unit);
2254 LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2255 }
2256 }
2257
2258 // We don't track kills for reserved registers.
2259 MRI->clearKillFlags(CP.getSrcReg());
2260
2261 return true;
2262}
2263
2264//===----------------------------------------------------------------------===//
2265// Interference checking and interval joining
2266//===----------------------------------------------------------------------===//
2267//
2268// In the easiest case, the two live ranges being joined are disjoint, and
2269// there is no interference to consider. It is quite common, though, to have
2270// overlapping live ranges, and we need to check if the interference can be
2271// resolved.
2272//
2273// The live range of a single SSA value forms a sub-tree of the dominator tree.
2274// This means that two SSA values overlap if and only if the def of one value
2275// is contained in the live range of the other value. As a special case, the
2276// overlapping values can be defined at the same index.
2277//
2278// The interference from an overlapping def can be resolved in these cases:
2279//
2280// 1. Coalescable copies. The value is defined by a copy that would become an
2281// identity copy after joining SrcReg and DstReg. The copy instruction will
2282// be removed, and the value will be merged with the source value.
2283//
2284// There can be several copies back and forth, causing many values to be
2285// merged into one. We compute a list of ultimate values in the joined live
2286// range as well as a mappings from the old value numbers.
2287//
2288// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2289// predecessors have a live out value. It doesn't cause real interference,
2290// and can be merged into the value it overlaps. Like a coalescable copy, it
2291// can be erased after joining.
2292//
2293// 3. Copy of external value. The overlapping def may be a copy of a value that
2294// is already in the other register. This is like a coalescable copy, but
2295// the live range of the source register must be trimmed after erasing the
2296// copy instruction:
2297//
2298// %src = COPY %ext
2299// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
2300//
2301// 4. Clobbering undefined lanes. Vector registers are sometimes built by
2302// defining one lane at a time:
2303//
2304// %dst:ssub0<def,read-undef> = FOO
2305// %src = BAR
2306// %dst:ssub1 = COPY %src
2307//
2308// The live range of %src overlaps the %dst value defined by FOO, but
2309// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2310// which was undef anyway.
2311//
2312// The value mapping is more complicated in this case. The final live range
2313// will have different value numbers for both FOO and BAR, but there is no
2314// simple mapping from old to new values. It may even be necessary to add
2315// new PHI values.
2316//
2317// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2318// is live, but never read. This can happen because we don't compute
2319// individual live ranges per lane.
2320//
2321// %dst = FOO
2322// %src = BAR
2323// %dst:ssub1 = COPY %src
2324//
2325// This kind of interference is only resolved locally. If the clobbered
2326// lane value escapes the block, the join is aborted.
2327
2328namespace {
2329
2330/// Track information about values in a single virtual register about to be
2331/// joined. Objects of this class are always created in pairs - one for each
2332/// side of the CoalescerPair (or one for each lane of a side of the coalescer
2333/// pair)
2334class JoinVals {
2335 /// Live range we work on.
2336 LiveRange &LR;
2337
2338 /// (Main) register we work on.
2339 const Register Reg;
2340
2341 /// Reg (and therefore the values in this liverange) will end up as
2342 /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2343 /// CP.SrcIdx.
2344 const unsigned SubIdx;
2345
2346 /// The LaneMask that this liverange will occupy the coalesced register. May
2347 /// be smaller than the lanemask produced by SubIdx when merging subranges.
2348 const LaneBitmask LaneMask;
2349
2350 /// This is true when joining sub register ranges, false when joining main
2351 /// ranges.
2352 const bool SubRangeJoin;
2353
2354 /// Whether the current LiveInterval tracks subregister liveness.
2355 const bool TrackSubRegLiveness;
2356
2357 /// Values that will be present in the final live range.
2358 SmallVectorImpl<VNInfo*> &NewVNInfo;
2359
2360 const CoalescerPair &CP;
2361 LiveIntervals *LIS;
2362 SlotIndexes *Indexes;
2363 const TargetRegisterInfo *TRI;
2364
2365 /// Value number assignments. Maps value numbers in LI to entries in
2366 /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2367 SmallVector<int, 8> Assignments;
2368
2369 public:
2370 /// Conflict resolution for overlapping values.
2371 enum ConflictResolution {
2372 /// No overlap, simply keep this value.
2373 CR_Keep,
2374
2375 /// Merge this value into OtherVNI and erase the defining instruction.
2376 /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2377 /// values.
2378 CR_Erase,
2379
2380 /// Merge this value into OtherVNI but keep the defining instruction.
2381 /// This is for the special case where OtherVNI is defined by the same
2382 /// instruction.
2383 CR_Merge,
2384
2385 /// Keep this value, and have it replace OtherVNI where possible. This
2386 /// complicates value mapping since OtherVNI maps to two different values
2387 /// before and after this def.
2388 /// Used when clobbering undefined or dead lanes.
2389 CR_Replace,
2390
2391 /// Unresolved conflict. Visit later when all values have been mapped.
2392 CR_Unresolved,
2393
2394 /// Unresolvable conflict. Abort the join.
2395 CR_Impossible
2396 };
2397
2398 private:
2399 /// Per-value info for LI. The lane bit masks are all relative to the final
2400 /// joined register, so they can be compared directly between SrcReg and
2401 /// DstReg.
2402 struct Val {
2403 ConflictResolution Resolution = CR_Keep;
2404
2405 /// Lanes written by this def, 0 for unanalyzed values.
2406 LaneBitmask WriteLanes;
2407
2408 /// Lanes with defined values in this register. Other lanes are undef and
2409 /// safe to clobber.
2410 LaneBitmask ValidLanes;
2411
2412 /// Value in LI being redefined by this def.
2413 VNInfo *RedefVNI = nullptr;
2414
2415 /// Value in the other live range that overlaps this def, if any.
2416 VNInfo *OtherVNI = nullptr;
2417
2418 /// Is this value an IMPLICIT_DEF that can be erased?
2419 ///
2420 /// IMPLICIT_DEF values should only exist at the end of a basic block that
2421 /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2422 /// safely erased if they are overlapping a live value in the other live
2423 /// interval.
2424 ///
2425 /// Weird control flow graphs and incomplete PHI handling in
2426 /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2427 /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2428 /// normal values.
2429 bool ErasableImplicitDef = false;
2430
2431 /// True when the live range of this value will be pruned because of an
2432 /// overlapping CR_Replace value in the other live range.
2433 bool Pruned = false;
2434
2435 /// True once Pruned above has been computed.
2436 bool PrunedComputed = false;
2437
2438 /// True if this value is determined to be identical to OtherVNI
2439 /// (in valuesIdentical). This is used with CR_Erase where the erased
2440 /// copy is redundant, i.e. the source value is already the same as
2441 /// the destination. In such cases the subranges need to be updated
2442 /// properly. See comment at pruneSubRegValues for more info.
2443 bool Identical = false;
2444
2445 Val() = default;
2446
2447 bool isAnalyzed() const { return WriteLanes.any(); }
2448
2449 /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an
2450 /// ordinary value.
2451 void mustKeepImplicitDef(const TargetRegisterInfo &TRI,
2452 const MachineInstr &ImpDef) {
2453 assert(ImpDef.isImplicitDef());
2454 ErasableImplicitDef = false;
2455 ValidLanes = TRI.getSubRegIndexLaneMask(ImpDef.getOperand(0).getSubReg());
2456 }
2457 };
2458
2459 /// One entry per value number in LI.
2461
2462 /// Compute the bitmask of lanes actually written by DefMI.
2463 /// Set Redef if there are any partial register definitions that depend on the
2464 /// previous value of the register.
2465 LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2466
2467 /// Find the ultimate value that VNI was copied from.
2468 std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2469
2470 bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2471
2472 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2473 /// Return a conflict resolution when possible, but leave the hard cases as
2474 /// CR_Unresolved.
2475 /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2476 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2477 /// The recursion always goes upwards in the dominator tree, making loops
2478 /// impossible.
2479 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2480
2481 /// Compute the value assignment for ValNo in RI.
2482 /// This may be called recursively by analyzeValue(), but never for a ValNo on
2483 /// the stack.
2484 void computeAssignment(unsigned ValNo, JoinVals &Other);
2485
2486 /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2487 /// the extent of the tainted lanes in the block.
2488 ///
2489 /// Multiple values in Other.LR can be affected since partial redefinitions
2490 /// can preserve previously tainted lanes.
2491 ///
2492 /// 1 %dst = VLOAD <-- Define all lanes in %dst
2493 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
2494 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
2495 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2496 ///
2497 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2498 /// entry to TaintedVals.
2499 ///
2500 /// Returns false if the tainted lanes extend beyond the basic block.
2501 bool
2502 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2503 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2504
2505 /// Return true if MI uses any of the given Lanes from Reg.
2506 /// This does not include partial redefinitions of Reg.
2507 bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2508
2509 /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2510 /// be pruned:
2511 ///
2512 /// %dst = COPY %src
2513 /// %src = COPY %dst <-- This value to be pruned.
2514 /// %dst = COPY %src <-- This value is a copy of a pruned value.
2515 bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2516
2517public:
2518 JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2519 SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2520 LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2521 bool TrackSubRegLiveness)
2522 : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2523 SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2524 NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2525 TRI(TRI), Assignments(LR.getNumValNums(), -1),
2526 Vals(LR.getNumValNums()) {}
2527
2528 /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2529 /// Returns false if any conflicts were impossible to resolve.
2530 bool mapValues(JoinVals &Other);
2531
2532 /// Try to resolve conflicts that require all values to be mapped.
2533 /// Returns false if any conflicts were impossible to resolve.
2534 bool resolveConflicts(JoinVals &Other);
2535
2536 /// Prune the live range of values in Other.LR where they would conflict with
2537 /// CR_Replace values in LR. Collect end points for restoring the live range
2538 /// after joining.
2539 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2540 bool changeInstrs);
2541
2542 /// Removes subranges starting at copies that get removed. This sometimes
2543 /// happens when undefined subranges are copied around. These ranges contain
2544 /// no useful information and can be removed.
2545 void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2546
2547 /// Pruning values in subranges can lead to removing segments in these
2548 /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2549 /// the main range also need to be removed. This function will mark
2550 /// the corresponding values in the main range as pruned, so that
2551 /// eraseInstrs can do the final cleanup.
2552 /// The parameter @p LI must be the interval whose main range is the
2553 /// live range LR.
2554 void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2555
2556 /// Erase any machine instructions that have been coalesced away.
2557 /// Add erased instructions to ErasedInstrs.
2558 /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2559 /// the erased instrs.
2561 SmallVectorImpl<Register> &ShrinkRegs,
2562 LiveInterval *LI = nullptr);
2563
2564 /// Remove liverange defs at places where implicit defs will be removed.
2565 void removeImplicitDefs();
2566
2567 /// Get the value assignments suitable for passing to LiveInterval::join.
2568 const int *getAssignments() const { return Assignments.data(); }
2569
2570 /// Get the conflict resolution for a value number.
2571 ConflictResolution getResolution(unsigned Num) const {
2572 return Vals[Num].Resolution;
2573 }
2574};
2575
2576} // end anonymous namespace
2577
2578LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2579 const {
2580 LaneBitmask L;
2581 for (const MachineOperand &MO : DefMI->all_defs()) {
2582 if (MO.getReg() != Reg)
2583 continue;
2584 L |= TRI->getSubRegIndexLaneMask(
2585 TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2586 if (MO.readsReg())
2587 Redef = true;
2588 }
2589 return L;
2590}
2591
2592std::pair<const VNInfo *, Register>
2593JoinVals::followCopyChain(const VNInfo *VNI) const {
2594 Register TrackReg = Reg;
2595
2596 while (!VNI->isPHIDef()) {
2597 SlotIndex Def = VNI->def;
2598 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2599 assert(MI && "No defining instruction");
2600 if (!MI->isFullCopy())
2601 return std::make_pair(VNI, TrackReg);
2602 Register SrcReg = MI->getOperand(1).getReg();
2603 if (!SrcReg.isVirtual())
2604 return std::make_pair(VNI, TrackReg);
2605
2606 const LiveInterval &LI = LIS->getInterval(SrcReg);
2607 const VNInfo *ValueIn;
2608 // No subrange involved.
2609 if (!SubRangeJoin || !LI.hasSubRanges()) {
2610 LiveQueryResult LRQ = LI.Query(Def);
2611 ValueIn = LRQ.valueIn();
2612 } else {
2613 // Query subranges. Ensure that all matching ones take us to the same def
2614 // (allowing some of them to be undef).
2615 ValueIn = nullptr;
2616 for (const LiveInterval::SubRange &S : LI.subranges()) {
2617 // Transform lanemask to a mask in the joined live interval.
2618 LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2619 if ((SMask & LaneMask).none())
2620 continue;
2621 LiveQueryResult LRQ = S.Query(Def);
2622 if (!ValueIn) {
2623 ValueIn = LRQ.valueIn();
2624 continue;
2625 }
2626 if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2627 return std::make_pair(VNI, TrackReg);
2628 }
2629 }
2630 if (ValueIn == nullptr) {
2631 // Reaching an undefined value is legitimate, for example:
2632 //
2633 // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
2634 // 2 %1 = COPY %0 ;; %1 is defined here.
2635 // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
2636 // ;; but it's equivalent to "undef".
2637 return std::make_pair(nullptr, SrcReg);
2638 }
2639 VNI = ValueIn;
2640 TrackReg = SrcReg;
2641 }
2642 return std::make_pair(VNI, TrackReg);
2643}
2644
2645bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2646 const JoinVals &Other) const {
2647 const VNInfo *Orig0;
2648 Register Reg0;
2649 std::tie(Orig0, Reg0) = followCopyChain(Value0);
2650 if (Orig0 == Value1 && Reg0 == Other.Reg)
2651 return true;
2652
2653 const VNInfo *Orig1;
2654 Register Reg1;
2655 std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2656 // If both values are undefined, and the source registers are the same
2657 // register, the values are identical. Filter out cases where only one
2658 // value is defined.
2659 if (Orig0 == nullptr || Orig1 == nullptr)
2660 return Orig0 == Orig1 && Reg0 == Reg1;
2661
2662 // The values are equal if they are defined at the same place and use the
2663 // same register. Note that we cannot compare VNInfos directly as some of
2664 // them might be from a copy created in mergeSubRangeInto() while the other
2665 // is from the original LiveInterval.
2666 return Orig0->def == Orig1->def && Reg0 == Reg1;
2667}
2668
2669JoinVals::ConflictResolution
2670JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2671 Val &V = Vals[ValNo];
2672 assert(!V.isAnalyzed() && "Value has already been analyzed!");
2673 VNInfo *VNI = LR.getValNumInfo(ValNo);
2674 if (VNI->isUnused()) {
2675 V.WriteLanes = LaneBitmask::getAll();
2676 return CR_Keep;
2677 }
2678
2679 // Get the instruction defining this value, compute the lanes written.
2680 const MachineInstr *DefMI = nullptr;
2681 if (VNI->isPHIDef()) {
2682 // Conservatively assume that all lanes in a PHI are valid.
2683 LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2684 : TRI->getSubRegIndexLaneMask(SubIdx);
2685 V.ValidLanes = V.WriteLanes = Lanes;
2686 } else {
2687 DefMI = Indexes->getInstructionFromIndex(VNI->def);
2688 assert(DefMI != nullptr);
2689 if (SubRangeJoin) {
2690 // We don't care about the lanes when joining subregister ranges.
2691 V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2692 if (DefMI->isImplicitDef()) {
2693 V.ValidLanes = LaneBitmask::getNone();
2694 V.ErasableImplicitDef = true;
2695 }
2696 } else {
2697 bool Redef = false;
2698 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2699
2700 // If this is a read-modify-write instruction, there may be more valid
2701 // lanes than the ones written by this instruction.
2702 // This only covers partial redef operands. DefMI may have normal use
2703 // operands reading the register. They don't contribute valid lanes.
2704 //
2705 // This adds ssub1 to the set of valid lanes in %src:
2706 //
2707 // %src:ssub1 = FOO
2708 //
2709 // This leaves only ssub1 valid, making any other lanes undef:
2710 //
2711 // %src:ssub1<def,read-undef> = FOO %src:ssub2
2712 //
2713 // The <read-undef> flag on the def operand means that old lane values are
2714 // not important.
2715 if (Redef) {
2716 V.RedefVNI = LR.Query(VNI->def).valueIn();
2717 assert((TrackSubRegLiveness || V.RedefVNI) &&
2718 "Instruction is reading nonexistent value");
2719 if (V.RedefVNI != nullptr) {
2720 computeAssignment(V.RedefVNI->id, Other);
2721 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2722 }
2723 }
2724
2725 // An IMPLICIT_DEF writes undef values.
2726 if (DefMI->isImplicitDef()) {
2727 // We normally expect IMPLICIT_DEF values to be live only until the end
2728 // of their block. If the value is really live longer and gets pruned in
2729 // another block, this flag is cleared again.
2730 //
2731 // Clearing the valid lanes is deferred until it is sure this can be
2732 // erased.
2733 V.ErasableImplicitDef = true;
2734 }
2735 }
2736 }
2737
2738 // Find the value in Other that overlaps VNI->def, if any.
2739 LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2740
2741 // It is possible that both values are defined by the same instruction, or
2742 // the values are PHIs defined in the same block. When that happens, the two
2743 // values should be merged into one, but not into any preceding value.
2744 // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2745 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2746 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2747
2748 // One value stays, the other is merged. Keep the earlier one, or the first
2749 // one we see.
2750 if (OtherVNI->def < VNI->def)
2751 Other.computeAssignment(OtherVNI->id, *this);
2752 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2753 // This is an early-clobber def overlapping a live-in value in the other
2754 // register. Not mergeable.
2755 V.OtherVNI = OtherLRQ.valueIn();
2756 return CR_Impossible;
2757 }
2758 V.OtherVNI = OtherVNI;
2759 Val &OtherV = Other.Vals[OtherVNI->id];
2760 // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2761 // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2762 // is assigned.
2763 if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2764 return CR_Keep;
2765 // Both sides have been analyzed now.
2766 // Allow overlapping PHI values. Any real interference would show up in a
2767 // predecessor, the PHI itself can't introduce any conflicts.
2768 if (VNI->isPHIDef())
2769 return CR_Merge;
2770 if ((V.ValidLanes & OtherV.ValidLanes).any())
2771 // Overlapping lanes can't be resolved.
2772 return CR_Impossible;
2773 else
2774 return CR_Merge;
2775 }
2776
2777 // No simultaneous def. Is Other live at the def?
2778 V.OtherVNI = OtherLRQ.valueIn();
2779 if (!V.OtherVNI)
2780 // No overlap, no conflict.
2781 return CR_Keep;
2782
2783 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2784
2785 // We have overlapping values, or possibly a kill of Other.
2786 // Recursively compute assignments up the dominator tree.
2787 Other.computeAssignment(V.OtherVNI->id, *this);
2788 Val &OtherV = Other.Vals[V.OtherVNI->id];
2789
2790 if (OtherV.ErasableImplicitDef) {
2791 // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2792 // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2793 // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2794 // technically.
2795 //
2796 // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2797 // to erase the IMPLICIT_DEF instruction.
2798 //
2799 // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming
2800 // value.
2801
2802 MachineInstr *OtherImpDef =
2803 Indexes->getInstructionFromIndex(V.OtherVNI->def);
2804 MachineBasicBlock *OtherMBB = OtherImpDef->getParent();
2805 if (DefMI &&
2806 (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, OtherMBB))) {
2807 LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2808 << " extends into "
2810 << ", keeping it.\n");
2811 OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2812 } else if (OtherMBB->hasEHPadSuccessor()) {
2813 // If OtherV is defined in a basic block that has EH pad successors then
2814 // we get the same problem not just if OtherV is live beyond its basic
2815 // block, but beyond the last call instruction in its basic block. Handle
2816 // this case conservatively.
2817 LLVM_DEBUG(
2818 dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2819 << " may be live into EH pad successors, keeping it.\n");
2820 OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2821 } else {
2822 // We deferred clearing these lanes in case we needed to save them
2823 OtherV.ValidLanes &= ~OtherV.WriteLanes;
2824 }
2825 }
2826
2827 // Allow overlapping PHI values. Any real interference would show up in a
2828 // predecessor, the PHI itself can't introduce any conflicts.
2829 if (VNI->isPHIDef())
2830 return CR_Replace;
2831
2832 // Check for simple erasable conflicts.
2833 if (DefMI->isImplicitDef())
2834 return CR_Erase;
2835
2836 // Include the non-conflict where DefMI is a coalescable copy that kills
2837 // OtherVNI. We still want the copy erased and value numbers merged.
2838 if (CP.isCoalescable(DefMI)) {
2839 // Some of the lanes copied from OtherVNI may be undef, making them undef
2840 // here too.
2841 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2842 return CR_Erase;
2843 }
2844
2845 // This may not be a real conflict if DefMI simply kills Other and defines
2846 // VNI.
2847 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2848 return CR_Keep;
2849
2850 // Handle the case where VNI and OtherVNI can be proven to be identical:
2851 //
2852 // %other = COPY %ext
2853 // %this = COPY %ext <-- Erase this copy
2854 //
2855 if (DefMI->isFullCopy() && !CP.isPartial() &&
2856 valuesIdentical(VNI, V.OtherVNI, Other)) {
2857 V.Identical = true;
2858 return CR_Erase;
2859 }
2860
2861 // The remaining checks apply to the lanes, which aren't tracked here. This
2862 // was already decided to be OK via the following CR_Replace condition.
2863 // CR_Replace.
2864 if (SubRangeJoin)
2865 return CR_Replace;
2866
2867 // If the lanes written by this instruction were all undef in OtherVNI, it is
2868 // still safe to join the live ranges. This can't be done with a simple value
2869 // mapping, though - OtherVNI will map to multiple values:
2870 //
2871 // 1 %dst:ssub0 = FOO <-- OtherVNI
2872 // 2 %src = BAR <-- VNI
2873 // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2874 // 4 BAZ killed %dst
2875 // 5 QUUX killed %src
2876 //
2877 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2878 // handles this complex value mapping.
2879 if ((V.WriteLanes & OtherV.ValidLanes).none())
2880 return CR_Replace;
2881
2882 // If the other live range is killed by DefMI and the live ranges are still
2883 // overlapping, it must be because we're looking at an early clobber def:
2884 //
2885 // %dst<def,early-clobber> = ASM killed %src
2886 //
2887 // In this case, it is illegal to merge the two live ranges since the early
2888 // clobber def would clobber %src before it was read.
2889 if (OtherLRQ.isKill()) {
2890 // This case where the def doesn't overlap the kill is handled above.
2891 assert(VNI->def.isEarlyClobber() &&
2892 "Only early clobber defs can overlap a kill");
2893 return CR_Impossible;
2894 }
2895
2896 // VNI is clobbering live lanes in OtherVNI, but there is still the
2897 // possibility that no instructions actually read the clobbered lanes.
2898 // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2899 // Otherwise Other.RI wouldn't be live here.
2900 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2901 return CR_Impossible;
2902
2903 if (TrackSubRegLiveness) {
2904 auto &OtherLI = LIS->getInterval(Other.Reg);
2905 // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
2906 // share the same live range, so we just need to check whether they have
2907 // any conflict bit in their LaneMask.
2908 if (!OtherLI.hasSubRanges()) {
2909 LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
2910 return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
2911 }
2912
2913 // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
2914 // impossible to resolve the conflict. Otherwise, we can just replace
2915 // OtherVNI because of no real conflict.
2916 for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
2917 LaneBitmask OtherMask =
2918 TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
2919 if ((OtherMask & V.WriteLanes).none())
2920 continue;
2921
2922 auto OtherSRQ = OtherSR.Query(VNI->def);
2923 if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
2924 // VNI is clobbering some lanes of OtherVNI, they have real conflict.
2925 return CR_Impossible;
2926 }
2927 }
2928
2929 // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
2930 return CR_Replace;
2931 }
2932
2933 // We need to verify that no instructions are reading the clobbered lanes.
2934 // To save compile time, we'll only check that locally. Don't allow the
2935 // tainted value to escape the basic block.
2936 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2937 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2938 return CR_Impossible;
2939
2940 // There are still some things that could go wrong besides clobbered lanes
2941 // being read, for example OtherVNI may be only partially redefined in MBB,
2942 // and some clobbered lanes could escape the block. Save this analysis for
2943 // resolveConflicts() when all values have been mapped. We need to know
2944 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2945 // that now - the recursive analyzeValue() calls must go upwards in the
2946 // dominator tree.
2947 return CR_Unresolved;
2948}
2949
2950void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2951 Val &V = Vals[ValNo];
2952 if (V.isAnalyzed()) {
2953 // Recursion should always move up the dominator tree, so ValNo is not
2954 // supposed to reappear before it has been assigned.
2955 assert(Assignments[ValNo] != -1 && "Bad recursion?");
2956 return;
2957 }
2958 switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2959 case CR_Erase:
2960 case CR_Merge:
2961 // Merge this ValNo into OtherVNI.
2962 assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2963 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2964 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2965 LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2966 << LR.getValNumInfo(ValNo)->def << " into "
2967 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2968 << V.OtherVNI->def << " --> @"
2969 << NewVNInfo[Assignments[ValNo]]->def << '\n');
2970 break;
2971 case CR_Replace:
2972 case CR_Unresolved: {
2973 // The other value is going to be pruned if this join is successful.
2974 assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2975 Val &OtherV = Other.Vals[V.OtherVNI->id];
2976 OtherV.Pruned = true;
2977 [[fallthrough]];
2978 }
2979 default:
2980 // This value number needs to go in the final joined live range.
2981 Assignments[ValNo] = NewVNInfo.size();
2982 NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2983 break;
2984 }
2985}
2986
2987bool JoinVals::mapValues(JoinVals &Other) {
2988 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2989 computeAssignment(i, Other);
2990 if (Vals[i].Resolution == CR_Impossible) {
2991 LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2992 << '@' << LR.getValNumInfo(i)->def << '\n');
2993 return false;
2994 }
2995 }
2996 return true;
2997}
2998
2999bool JoinVals::
3000taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
3001 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
3002 VNInfo *VNI = LR.getValNumInfo(ValNo);
3003 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3004 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
3005
3006 // Scan Other.LR from VNI.def to MBBEnd.
3007 LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
3008 assert(OtherI != Other.LR.end() && "No conflict?");
3009 do {
3010 // OtherI is pointing to a tainted value. Abort the join if the tainted
3011 // lanes escape the block.
3012 SlotIndex End = OtherI->end;
3013 if (End >= MBBEnd) {
3014 LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3015 << OtherI->valno->id << '@' << OtherI->start << '\n');
3016 return false;
3017 }
3018 LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3019 << OtherI->valno->id << '@' << OtherI->start << " to "
3020 << End << '\n');
3021 // A dead def is not a problem.
3022 if (End.isDead())
3023 break;
3024 TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3025
3026 // Check for another def in the MBB.
3027 if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3028 break;
3029
3030 // Lanes written by the new def are no longer tainted.
3031 const Val &OV = Other.Vals[OtherI->valno->id];
3032 TaintedLanes &= ~OV.WriteLanes;
3033 if (!OV.RedefVNI)
3034 break;
3035 } while (TaintedLanes.any());
3036 return true;
3037}
3038
3039bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3040 LaneBitmask Lanes) const {
3041 if (MI.isDebugOrPseudoInstr())
3042 return false;
3043 for (const MachineOperand &MO : MI.all_uses()) {
3044 if (MO.getReg() != Reg)
3045 continue;
3046 if (!MO.readsReg())
3047 continue;
3048 unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3049 if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3050 return true;
3051 }
3052 return false;
3053}
3054
3055bool JoinVals::resolveConflicts(JoinVals &Other) {
3056 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3057 Val &V = Vals[i];
3058 assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3059 if (V.Resolution != CR_Unresolved)
3060 continue;
3061 LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3062 << LR.getValNumInfo(i)->def
3063 << ' ' << PrintLaneMask(LaneMask) << '\n');
3064 if (SubRangeJoin)
3065 return false;
3066
3067 ++NumLaneConflicts;
3068 assert(V.OtherVNI && "Inconsistent conflict resolution.");
3069 VNInfo *VNI = LR.getValNumInfo(i);
3070 const Val &OtherV = Other.Vals[V.OtherVNI->id];
3071
3072 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3073 // join, those lanes will be tainted with a wrong value. Get the extent of
3074 // the tainted lanes.
3075 LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3077 if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3078 // Tainted lanes would extend beyond the basic block.
3079 return false;
3080
3081 assert(!TaintExtent.empty() && "There should be at least one conflict.");
3082
3083 // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3084 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3086 if (!VNI->isPHIDef()) {
3087 MI = Indexes->getInstructionFromIndex(VNI->def);
3088 if (!VNI->def.isEarlyClobber()) {
3089 // No need to check the instruction defining VNI for reads.
3090 ++MI;
3091 }
3092 }
3093 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3094 "Interference ends on VNI->def. Should have been handled earlier");
3095 MachineInstr *LastMI =
3096 Indexes->getInstructionFromIndex(TaintExtent.front().first);
3097 assert(LastMI && "Range must end at a proper instruction");
3098 unsigned TaintNum = 0;
3099 while (true) {
3100 assert(MI != MBB->end() && "Bad LastMI");
3101 if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3102 LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3103 return false;
3104 }
3105 // LastMI is the last instruction to use the current value.
3106 if (&*MI == LastMI) {
3107 if (++TaintNum == TaintExtent.size())
3108 break;
3109 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3110 assert(LastMI && "Range must end at a proper instruction");
3111 TaintedLanes = TaintExtent[TaintNum].second;
3112 }
3113 ++MI;
3114 }
3115
3116 // The tainted lanes are unused.
3117 V.Resolution = CR_Replace;
3118 ++NumLaneResolves;
3119 }
3120 return true;
3121}
3122
3123bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3124 Val &V = Vals[ValNo];
3125 if (V.Pruned || V.PrunedComputed)
3126 return V.Pruned;
3127
3128 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3129 return V.Pruned;
3130
3131 // Follow copies up the dominator tree and check if any intermediate value
3132 // has been pruned.
3133 V.PrunedComputed = true;
3134 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3135 return V.Pruned;
3136}
3137
3138void JoinVals::pruneValues(JoinVals &Other,
3139 SmallVectorImpl<SlotIndex> &EndPoints,
3140 bool changeInstrs) {
3141 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3142 SlotIndex Def = LR.getValNumInfo(i)->def;
3143 switch (Vals[i].Resolution) {
3144 case CR_Keep:
3145 break;
3146 case CR_Replace: {
3147 // This value takes precedence over the value in Other.LR.
3148 LIS->pruneValue(Other.LR, Def, &EndPoints);
3149 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3150 // instructions are only inserted to provide a live-out value for PHI
3151 // predecessors, so the instruction should simply go away once its value
3152 // has been replaced.
3153 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3154 bool EraseImpDef = OtherV.ErasableImplicitDef &&
3155 OtherV.Resolution == CR_Keep;
3156 if (!Def.isBlock()) {
3157 if (changeInstrs) {
3158 // Remove <def,read-undef> flags. This def is now a partial redef.
3159 // Also remove dead flags since the joined live range will
3160 // continue past this instruction.
3161 for (MachineOperand &MO :
3162 Indexes->getInstructionFromIndex(Def)->operands()) {
3163 if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
3164 if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3165 MO.setIsUndef(false);
3166 MO.setIsDead(false);
3167 }
3168 }
3169 }
3170 // This value will reach instructions below, but we need to make sure
3171 // the live range also reaches the instruction at Def.
3172 if (!EraseImpDef)
3173 EndPoints.push_back(Def);
3174 }
3175 LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3176 << ": " << Other.LR << '\n');
3177 break;
3178 }
3179 case CR_Erase:
3180 case CR_Merge:
3181 if (isPrunedValue(i, Other)) {
3182 // This value is ultimately a copy of a pruned value in LR or Other.LR.
3183 // We can no longer trust the value mapping computed by
3184 // computeAssignment(), the value that was originally copied could have
3185 // been replaced.
3186 LIS->pruneValue(LR, Def, &EndPoints);
3187 LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3188 << Def << ": " << LR << '\n');
3189 }
3190 break;
3191 case CR_Unresolved:
3192 case CR_Impossible:
3193 llvm_unreachable("Unresolved conflicts");
3194 }
3195 }
3196}
3197
3198// Check if the segment consists of a copied live-through value (i.e. the copy
3199// in the block only extended the liveness, of an undef value which we may need
3200// to handle).
3201static bool isLiveThrough(const LiveQueryResult Q) {
3202 return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3203}
3204
3205/// Consider the following situation when coalescing the copy between
3206/// %31 and %45 at 800. (The vertical lines represent live range segments.)
3207///
3208/// Main range Subrange 0004 (sub2)
3209/// %31 %45 %31 %45
3210/// 544 %45 = COPY %28 + +
3211/// | v1 | v1
3212/// 560B bb.1: + +
3213/// 624 = %45.sub2 | v2 | v2
3214/// 800 %31 = COPY %45 + + + +
3215/// | v0 | v0
3216/// 816 %31.sub1 = ... + |
3217/// 880 %30 = COPY %31 | v1 +
3218/// 928 %45 = COPY %30 | + +
3219/// | | v0 | v0 <--+
3220/// 992B ; backedge -> bb.1 | + + |
3221/// 1040 = %31.sub0 + |
3222/// This value must remain
3223/// live-out!
3224///
3225/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3226/// redundant, since it copies the value from %45 back into it. The
3227/// conflict resolution for the main range determines that %45.v0 is
3228/// to be erased, which is ok since %31.v1 is identical to it.
3229/// The problem happens with the subrange for sub2: it has to be live
3230/// on exit from the block, but since 928 was actually a point of
3231/// definition of %45.sub2, %45.sub2 was not live immediately prior
3232/// to that definition. As a result, when 928 was erased, the value v0
3233/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3234/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3235/// providing an incorrect value to the use at 624.
3236///
3237/// Since the main-range values %31.v1 and %45.v0 were proved to be
3238/// identical, the corresponding values in subranges must also be the
3239/// same. A redundant copy is removed because it's not needed, and not
3240/// because it copied an undefined value, so any liveness that originated
3241/// from that copy cannot disappear. When pruning a value that started
3242/// at the removed copy, the corresponding identical value must be
3243/// extended to replace it.
3244void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3245 // Look for values being erased.
3246 bool DidPrune = false;
3247 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3248 Val &V = Vals[i];
3249 // We should trigger in all cases in which eraseInstrs() does something.
3250 // match what eraseInstrs() is doing, print a message so
3251 if (V.Resolution != CR_Erase &&
3252 (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3253 continue;
3254
3255 // Check subranges at the point where the copy will be removed.
3256 SlotIndex Def = LR.getValNumInfo(i)->def;
3257 SlotIndex OtherDef;
3258 if (V.Identical)
3259 OtherDef = V.OtherVNI->def;
3260
3261 // Print message so mismatches with eraseInstrs() can be diagnosed.
3262 LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3263 << '\n');
3264 for (LiveInterval::SubRange &S : LI.subranges()) {
3265 LiveQueryResult Q = S.Query(Def);
3266
3267 // If a subrange starts at the copy then an undefined value has been
3268 // copied and we must remove that subrange value as well.
3269 VNInfo *ValueOut = Q.valueOutOrDead();
3270 if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
3271 (V.Identical && V.Resolution == CR_Erase &&
3272 ValueOut->def == Def))) {
3273 LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3274 << " at " << Def << "\n");
3275 SmallVector<SlotIndex,8> EndPoints;
3276 LIS->pruneValue(S, Def, &EndPoints);
3277 DidPrune = true;
3278 // Mark value number as unused.
3279 ValueOut->markUnused();
3280
3281 if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3282 // If V is identical to V.OtherVNI (and S was live at OtherDef),
3283 // then we can't simply prune V from S. V needs to be replaced
3284 // with V.OtherVNI.
3285 LIS->extendToIndices(S, EndPoints);
3286 }
3287
3288 // We may need to eliminate the subrange if the copy introduced a live
3289 // out undef value.
3290 if (ValueOut->isPHIDef())
3291 ShrinkMask |= S.LaneMask;
3292 continue;
3293 }
3294
3295 // If a subrange ends at the copy, then a value was copied but only
3296 // partially used later. Shrink the subregister range appropriately.
3297 //
3298 // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3299 // conservatively correct.
3300 if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3301 (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3302 LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3303 << PrintLaneMask(S.LaneMask) << " at " << Def
3304 << "\n");
3305 ShrinkMask |= S.LaneMask;
3306 }
3307 }
3308 }
3309 if (DidPrune)
3311}
3312
3313/// Check if any of the subranges of @p LI contain a definition at @p Def.
3315 for (LiveInterval::SubRange &SR : LI.subranges()) {
3316 if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3317 if (VNI->def == Def)
3318 return true;
3319 }
3320 return false;
3321}
3322
3323void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3324 assert(&static_cast<LiveRange&>(LI) == &LR);
3325
3326 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3327 if (Vals[i].Resolution != CR_Keep)
3328 continue;
3329 VNInfo *VNI = LR.getValNumInfo(i);
3330 if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3331 continue;
3332 Vals[i].Pruned = true;
3333 ShrinkMainRange = true;
3334 }
3335}
3336
3337void JoinVals::removeImplicitDefs() {
3338 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3339 Val &V = Vals[i];
3340 if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3341 continue;
3342
3343 VNInfo *VNI = LR.getValNumInfo(i);
3344 VNI->markUnused();
3345 LR.removeValNo(VNI);
3346 }
3347}
3348
3349void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3350 SmallVectorImpl<Register> &ShrinkRegs,
3351 LiveInterval *LI) {
3352 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3353 // Get the def location before markUnused() below invalidates it.
3354 VNInfo *VNI = LR.getValNumInfo(i);
3355 SlotIndex Def = VNI->def;
3356 switch (Vals[i].Resolution) {
3357 case CR_Keep: {
3358 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3359 // longer. The IMPLICIT_DEF instructions are only inserted by
3360 // PHIElimination to guarantee that all PHI predecessors have a value.
3361 if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3362 break;
3363 // Remove value number i from LR.
3364 // For intervals with subranges, removing a segment from the main range
3365 // may require extending the previous segment: for each definition of
3366 // a subregister, there will be a corresponding def in the main range.
3367 // That def may fall in the middle of a segment from another subrange.
3368 // In such cases, removing this def from the main range must be
3369 // complemented by extending the main range to account for the liveness
3370 // of the other subrange.
3371 // The new end point of the main range segment to be extended.
3372 SlotIndex NewEnd;
3373 if (LI != nullptr) {
3375 assert(I != LR.end());
3376 // Do not extend beyond the end of the segment being removed.
3377 // The segment may have been pruned in preparation for joining
3378 // live ranges.
3379 NewEnd = I->end;
3380 }
3381
3382 LR.removeValNo(VNI);
3383 // Note that this VNInfo is reused and still referenced in NewVNInfo,
3384 // make it appear like an unused value number.
3385 VNI->markUnused();
3386
3387 if (LI != nullptr && LI->hasSubRanges()) {
3388 assert(static_cast<LiveRange*>(LI) == &LR);
3389 // Determine the end point based on the subrange information:
3390 // minimum of (earliest def of next segment,
3391 // latest end point of containing segment)
3392 SlotIndex ED, LE;
3393 for (LiveInterval::SubRange &SR : LI->subranges()) {
3394 LiveRange::iterator I = SR.find(Def);
3395 if (I == SR.end())
3396 continue;
3397 if (I->start > Def)
3398 ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3399 else
3400 LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3401 }
3402 if (LE.isValid())
3403 NewEnd = std::min(NewEnd, LE);
3404 if (ED.isValid())
3405 NewEnd = std::min(NewEnd, ED);
3406
3407 // We only want to do the extension if there was a subrange that
3408 // was live across Def.
3409 if (LE.isValid()) {
3410 LiveRange::iterator S = LR.find(Def);
3411 if (S != LR.begin())
3412 std::prev(S)->end = NewEnd;
3413 }
3414 }
3415 LLVM_DEBUG({
3416 dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3417 if (LI != nullptr)
3418 dbgs() << "\t\t LHS = " << *LI << '\n';
3419 });
3420 [[fallthrough]];
3421 }
3422
3423 case CR_Erase: {
3424 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3425 assert(MI && "No instruction to erase");
3426 if (MI->isCopy()) {
3427 Register Reg = MI->getOperand(1).getReg();
3428 if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3429 ShrinkRegs.push_back(Reg);
3430 }
3431 ErasedInstrs.insert(MI);
3432 LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3434 MI->eraseFromParent();
3435 break;
3436 }
3437 default:
3438 break;
3439 }
3440 }
3441}
3442
3443void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3444 LaneBitmask LaneMask,
3445 const CoalescerPair &CP) {
3446 SmallVector<VNInfo*, 16> NewVNInfo;
3447 JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3448 NewVNInfo, CP, LIS, TRI, true, true);
3449 JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3450 NewVNInfo, CP, LIS, TRI, true, true);
3451
3452 // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3453 // We should be able to resolve all conflicts here as we could successfully do
3454 // it on the mainrange already. There is however a problem when multiple
3455 // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3456 // interferences.
3457 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3458 // We already determined that it is legal to merge the intervals, so this
3459 // should never fail.
3460 llvm_unreachable("*** Couldn't join subrange!\n");
3461 }
3462 if (!LHSVals.resolveConflicts(RHSVals) ||
3463 !RHSVals.resolveConflicts(LHSVals)) {
3464 // We already determined that it is legal to merge the intervals, so this
3465 // should never fail.
3466 llvm_unreachable("*** Couldn't join subrange!\n");
3467 }
3468
3469 // The merging algorithm in LiveInterval::join() can't handle conflicting
3470 // value mappings, so we need to remove any live ranges that overlap a
3471 // CR_Replace resolution. Collect a set of end points that can be used to
3472 // restore the live range after joining.
3473 SmallVector<SlotIndex, 8> EndPoints;
3474 LHSVals.pruneValues(RHSVals, EndPoints, false);
3475 RHSVals.pruneValues(LHSVals, EndPoints, false);
3476
3477 LHSVals.removeImplicitDefs();
3478 RHSVals.removeImplicitDefs();
3479
3480 LRange.verify();
3481 RRange.verify();
3482
3483 // Join RRange into LHS.
3484 LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3485 NewVNInfo);
3486
3487 LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3488 << ' ' << LRange << "\n");
3489 if (EndPoints.empty())
3490 return;
3491
3492 // Recompute the parts of the live range we had to remove because of
3493 // CR_Replace conflicts.
3494 LLVM_DEBUG({
3495 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3496 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3497 dbgs() << EndPoints[i];
3498 if (i != n-1)
3499 dbgs() << ',';
3500 }
3501 dbgs() << ": " << LRange << '\n';
3502 });
3503 LIS->extendToIndices(LRange, EndPoints);
3504}
3505
3506void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3507 const LiveRange &ToMerge,
3508 LaneBitmask LaneMask,
3509 CoalescerPair &CP,
3510 unsigned ComposeSubRegIdx) {
3512 LI.refineSubRanges(
3513 Allocator, LaneMask,
3514 [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3515 if (SR.empty()) {
3516 SR.assign(ToMerge, Allocator);
3517 } else {
3518 // joinSubRegRange() destroys the merged range, so we need a copy.
3519 LiveRange RangeCopy(ToMerge, Allocator);
3520 joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3521 }
3522 },
3523 *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3524}
3525
3526bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3528 return false;
3529 auto &Counter = LargeLIVisitCounter[LI.reg()];
3530 if (Counter < LargeIntervalFreqThreshold) {
3531 Counter++;
3532 return false;
3533 }
3534 return true;
3535}
3536
3537bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3538 SmallVector<VNInfo*, 16> NewVNInfo;
3539 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3540 LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3541 bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3542 JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3543 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3544 JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3545 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3546
3547 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3548
3549 if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3550 return false;
3551
3552 // First compute NewVNInfo and the simple value mappings.
3553 // Detect impossible conflicts early.
3554 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3555 return false;
3556
3557 // Some conflicts can only be resolved after all values have been mapped.
3558 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3559 return false;
3560
3561 // All clear, the live ranges can be merged.
3562 if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3564
3565 // Transform lanemasks from the LHS to masks in the coalesced register and
3566 // create initial subranges if necessary.
3567 unsigned DstIdx = CP.getDstIdx();
3568 if (!LHS.hasSubRanges()) {
3569 LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3570 : TRI->getSubRegIndexLaneMask(DstIdx);
3571 // LHS must support subregs or we wouldn't be in this codepath.
3572 assert(Mask.any());
3573 LHS.createSubRangeFrom(Allocator, Mask, LHS);
3574 } else if (DstIdx != 0) {
3575 // Transform LHS lanemasks to new register class if necessary.
3576 for (LiveInterval::SubRange &R : LHS.subranges()) {
3577 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3578 R.LaneMask = Mask;
3579 }
3580 }
3581 LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3582 << '\n');
3583
3584 // Determine lanemasks of RHS in the coalesced register and merge subranges.
3585 unsigned SrcIdx = CP.getSrcIdx();
3586 if (!RHS.hasSubRanges()) {
3587 LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3588 : TRI->getSubRegIndexLaneMask(SrcIdx);
3589 mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3590 } else {
3591 // Pair up subranges and merge.
3592 for (LiveInterval::SubRange &R : RHS.subranges()) {
3593 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3594 mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3595 }
3596 }
3597 LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3598
3599 // Pruning implicit defs from subranges may result in the main range
3600 // having stale segments.
3601 LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3602
3603 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3604 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3605 }
3606
3607 // The merging algorithm in LiveInterval::join() can't handle conflicting
3608 // value mappings, so we need to remove any live ranges that overlap a
3609 // CR_Replace resolution. Collect a set of end points that can be used to
3610 // restore the live range after joining.
3611 SmallVector<SlotIndex, 8> EndPoints;
3612 LHSVals.pruneValues(RHSVals, EndPoints, true);
3613 RHSVals.pruneValues(LHSVals, EndPoints, true);
3614
3615 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3616 // registers to require trimming.
3617 SmallVector<Register, 8> ShrinkRegs;
3618 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3619 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3620 while (!ShrinkRegs.empty())
3621 shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3622
3623 // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3624 checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3625
3626 // If the RHS covers any PHI locations that were tracked for debug-info, we
3627 // must update tracking information to reflect the join.
3628 auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3629 if (RegIt != RegToPHIIdx.end()) {
3630 // Iterate over all the debug instruction numbers assigned this register.
3631 for (unsigned InstID : RegIt->second) {
3632 auto PHIIt = PHIValToPos.find(InstID);
3633 assert(PHIIt != PHIValToPos.end());
3634 const SlotIndex &SI = PHIIt->second.SI;
3635
3636 // Does the RHS cover the position of this PHI?
3637 auto LII = RHS.find(SI);
3638 if (LII == RHS.end() || LII->start > SI)
3639 continue;
3640
3641 // Accept two kinds of subregister movement:
3642 // * When we merge from one register class into a larger register:
3643 // %1:gr16 = some-inst
3644 // ->
3645 // %2:gr32.sub_16bit = some-inst
3646 // * When the PHI is already in a subregister, and the larger class
3647 // is coalesced:
3648 // %2:gr32.sub_16bit = some-inst
3649 // %3:gr32 = COPY %2
3650 // ->
3651 // %3:gr32.sub_16bit = some-inst
3652 // Test for subregister move:
3653 if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3654 // If we're moving between different subregisters, ignore this join.
3655 // The PHI will not get a location, dropping variable locations.
3656 if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3657 continue;
3658
3659 // Update our tracking of where the PHI is.
3660 PHIIt->second.Reg = CP.getDstReg();
3661
3662 // If we merge into a sub-register of a larger class (test above),
3663 // update SubReg.
3664 if (CP.getSrcIdx() != 0)
3665 PHIIt->second.SubReg = CP.getSrcIdx();
3666 }
3667
3668 // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3669 // different VRegs now. Copy old collection of debug instruction numbers and
3670 // erase the old one:
3671 auto InstrNums = RegIt->second;
3672 RegToPHIIdx.erase(RegIt);
3673
3674 // There might already be PHIs being tracked in the destination VReg. Insert
3675 // into an existing tracking collection, or insert a new one.
3676 RegIt = RegToPHIIdx.find(CP.getDstReg());
3677 if (RegIt != RegToPHIIdx.end())
3678 RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3679 InstrNums.end());
3680 else
3681 RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3682 }
3683
3684 // Join RHS into LHS.
3685 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3686
3687 // Kill flags are going to be wrong if the live ranges were overlapping.
3688 // Eventually, we should simply clear all kill flags when computing live
3689 // ranges. They are reinserted after register allocation.
3690 MRI->clearKillFlags(LHS.reg());
3691 MRI->clearKillFlags(RHS.reg());
3692
3693 if (!EndPoints.empty()) {
3694 // Recompute the parts of the live range we had to remove because of
3695 // CR_Replace conflicts.
3696 LLVM_DEBUG({
3697 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3698 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3699 dbgs() << EndPoints[i];
3700 if (i != n-1)
3701 dbgs() << ',';
3702 }
3703 dbgs() << ": " << LHS << '\n';
3704 });
3705 LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3706 }
3707
3708 return true;
3709}
3710
3711bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3712 return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3713}
3714
3715void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
3716{
3717 const SlotIndexes &Slots = *LIS->getSlotIndexes();
3719
3720 // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3721 // vreg => DbgValueLoc map.
3722 auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3723 for (auto *X : ToInsert) {
3724 for (const auto &Op : X->debug_operands()) {
3725 if (Op.isReg() && Op.getReg().isVirtual())
3726 DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3727 }
3728 }
3729
3730 ToInsert.clear();
3731 };
3732
3733 // Iterate over all instructions, collecting them into the ToInsert vector.
3734 // Once a non-debug instruction is found, record the slot index of the
3735 // collected DBG_VALUEs.
3736 for (auto &MBB : MF) {
3737 SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3738
3739 for (auto &MI : MBB) {
3740 if (MI.isDebugValue()) {
3741 if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3742 return MO.isReg() && MO.getReg().isVirtual();
3743 }))
3744 ToInsert.push_back(&MI);
3745 } else if (!MI.isDebugOrPseudoInstr()) {
3746 CurrentSlot = Slots.getInstructionIndex(MI);
3747 CloseNewDVRange(CurrentSlot);
3748 }
3749 }
3750
3751 // Close range of DBG_VALUEs at the end of blocks.
3752 CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3753 }
3754
3755 // Sort all DBG_VALUEs we've seen by slot number.
3756 for (auto &Pair : DbgVRegToValues)
3757 llvm::sort(Pair.second);
3758}
3759
3760void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3761 LiveRange &LHS,
3762 JoinVals &LHSVals,
3763 LiveRange &RHS,
3764 JoinVals &RHSVals) {
3765 auto ScanForDstReg = [&](Register Reg) {
3766 checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3767 };
3768
3769 auto ScanForSrcReg = [&](Register Reg) {
3770 checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3771 };
3772
3773 // Scan for unsound updates of both the source and destination register.
3774 ScanForSrcReg(CP.getSrcReg());
3775 ScanForDstReg(CP.getDstReg());
3776}
3777
3778void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3779 LiveRange &OtherLR,
3780 LiveRange &RegLR,
3781 JoinVals &RegVals) {
3782 // Are there any DBG_VALUEs to examine?
3783 auto VRegMapIt = DbgVRegToValues.find(Reg);
3784 if (VRegMapIt == DbgVRegToValues.end())
3785 return;
3786
3787 auto &DbgValueSet = VRegMapIt->second;
3788 auto DbgValueSetIt = DbgValueSet.begin();
3789 auto SegmentIt = OtherLR.begin();
3790
3791 bool LastUndefResult = false;
3792 SlotIndex LastUndefIdx;
3793
3794 // If the "Other" register is live at a slot Idx, test whether Reg can
3795 // safely be merged with it, or should be marked undef.
3796 auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3797 &LastUndefIdx](SlotIndex Idx) -> bool {
3798 // Our worst-case performance typically happens with asan, causing very
3799 // many DBG_VALUEs of the same location. Cache a copy of the most recent
3800 // result for this edge-case.
3801 if (LastUndefIdx == Idx)
3802 return LastUndefResult;
3803
3804 // If the other range was live, and Reg's was not, the register coalescer
3805 // will not have tried to resolve any conflicts. We don't know whether
3806 // the DBG_VALUE will refer to the same value number, so it must be made
3807 // undef.
3808 auto OtherIt = RegLR.find(Idx);
3809 if (OtherIt == RegLR.end())
3810 return true;
3811
3812 // Both the registers were live: examine the conflict resolution record for
3813 // the value number Reg refers to. CR_Keep meant that this value number
3814 // "won" and the merged register definitely refers to that value. CR_Erase
3815 // means the value number was a redundant copy of the other value, which
3816 // was coalesced and Reg deleted. It's safe to refer to the other register
3817 // (which will be the source of the copy).
3818 auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3819 LastUndefResult = Resolution != JoinVals::CR_Keep &&
3820 Resolution != JoinVals::CR_Erase;
3821 LastUndefIdx = Idx;
3822 return LastUndefResult;
3823 };
3824
3825 // Iterate over both the live-range of the "Other" register, and the set of
3826 // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3827 // slot index. This relies on the DbgValueSet being ordered.
3828 while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3829 if (DbgValueSetIt->first < SegmentIt->end) {
3830 // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3831 // set it undef.
3832 if (DbgValueSetIt->first >= SegmentIt->start) {
3833 bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3834 bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3835 if (HasReg && ShouldUndefReg) {
3836 // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3837 DbgValueSetIt->second->setDebugValueUndef();
3838 continue;
3839 }
3840 }
3841 ++DbgValueSetIt;
3842 } else {
3843 ++SegmentIt;
3844 }
3845 }
3846}
3847
3848namespace {
3849
3850/// Information concerning MBB coalescing priority.
3851struct MBBPriorityInfo {
3853 unsigned Depth;
3854 bool IsSplit;
3855
3856 MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3857 : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3858};
3859
3860} // end anonymous namespace
3861
3862/// C-style comparator that sorts first based on the loop depth of the basic
3863/// block (the unsigned), and then on the MBB number.
3864///
3865/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3866static int compareMBBPriority(const MBBPriorityInfo *LHS,
3867 const MBBPriorityInfo *RHS) {
3868 // Deeper loops first
3869 if (LHS->Depth != RHS->Depth)
3870 return LHS->Depth > RHS->Depth ? -1 : 1;
3871
3872 // Try to unsplit critical edges next.
3873 if (LHS->IsSplit != RHS->IsSplit)
3874 return LHS->IsSplit ? -1 : 1;
3875
3876 // Prefer blocks that are more connected in the CFG. This takes care of
3877 // the most difficult copies first while intervals are short.
3878 unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3879 unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3880 if (cl != cr)
3881 return cl > cr ? -1 : 1;
3882
3883 // As a last resort, sort by block number.
3884 return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3885}
3886
3887/// \returns true if the given copy uses or defines a local live range.
3888static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3889 if (!Copy->isCopy())
3890 return false;
3891
3892 if (Copy->getOperand(1).isUndef())
3893 return false;
3894
3895 Register SrcReg = Copy->getOperand(1).getReg();
3896 Register DstReg = Copy->getOperand(0).getReg();
3897 if (SrcReg.isPhysical() || DstReg.isPhysical())
3898 return false;
3899
3900 return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3901 || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3902}
3903
3904void RegisterCoalescer::lateLiveIntervalUpdate() {
3905 for (Register reg : ToBeUpdated) {
3906 if (!LIS->hasInterval(reg))
3907 continue;
3908 LiveInterval &LI = LIS->getInterval(reg);
3909 shrinkToUses(&LI, &DeadDefs);
3910 if (!DeadDefs.empty())
3911 eliminateDeadDefs();
3912 }
3913 ToBeUpdated.clear();
3914}
3915
3916bool RegisterCoalescer::
3917copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3918 bool Progress = false;
3919 for (MachineInstr *&MI : CurrList) {
3920 if (!MI)
3921 continue;
3922 // Skip instruction pointers that have already been erased, for example by
3923 // dead code elimination.
3924 if (ErasedInstrs.count(MI)) {
3925 MI = nullptr;
3926 continue;
3927 }
3928 bool Again = false;
3929 bool Success = joinCopy(MI, Again);
3930 Progress |= Success;
3931 if (Success || !Again)
3932 MI = nullptr;
3933 }
3934 return Progress;
3935}
3936
3937/// Check if DstReg is a terminal node.
3938/// I.e., it does not have any affinity other than \p Copy.
3939static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
3940 const MachineRegisterInfo *MRI) {
3941 assert(Copy.isCopyLike());
3942 // Check if the destination of this copy as any other affinity.
3943 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3944 if (&MI != &Copy && MI.isCopyLike())
3945 return false;
3946 return true;
3947}
3948
3949bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3950 assert(Copy.isCopyLike());
3951 if (!UseTerminalRule)
3952 return false;
3953 Register SrcReg, DstReg;
3954 unsigned SrcSubReg = 0, DstSubReg = 0;
3955 if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
3956 return false;
3957 // Check if the destination of this copy has any other affinity.
3958 if (DstReg.isPhysical() ||
3959 // If SrcReg is a physical register, the copy won't be coalesced.
3960 // Ignoring it may have other side effect (like missing
3961 // rematerialization). So keep it.
3962 SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
3963 return false;
3964
3965 // DstReg is a terminal node. Check if it interferes with any other
3966 // copy involving SrcReg.
3967 const MachineBasicBlock *OrigBB = Copy.getParent();
3968 const LiveInterval &DstLI = LIS->getInterval(DstReg);
3969 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3970 // Technically we should check if the weight of the new copy is
3971 // interesting compared to the other one and update the weight
3972 // of the copies accordingly. However, this would only work if
3973 // we would gather all the copies first then coalesce, whereas
3974 // right now we interleave both actions.
3975 // For now, just consider the copies that are in the same block.
3976 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
3977 continue;
3978 Register OtherSrcReg, OtherReg;
3979 unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
3980 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3981 OtherSubReg))
3982 return false;
3983 if (OtherReg == SrcReg)
3984 OtherReg = OtherSrcReg;
3985 // Check if OtherReg is a non-terminal.
3986 if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
3987 continue;
3988 // Check that OtherReg interfere with DstReg.
3989 if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3990 LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
3991 << '\n');
3992 return true;
3993 }
3994 }
3995 return false;
3996}
3997
3998void
3999RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
4000 LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
4001
4002 // Collect all copy-like instructions in MBB. Don't start coalescing anything
4003 // yet, it might invalidate the iterator.
4004 const unsigned PrevSize = WorkList.size();
4005 if (JoinGlobalCopies) {
4006 SmallVector<MachineInstr*, 2> LocalTerminals;
4007 SmallVector<MachineInstr*, 2> GlobalTerminals;
4008 // Coalesce copies bottom-up to coalesce local defs before local uses. They
4009 // are not inherently easier to resolve, but slightly preferable until we
4010 // have local live range splitting. In particular this is required by
4011 // cmp+jmp macro fusion.
4012 for (MachineInstr &MI : *MBB) {
4013 if (!MI.isCopyLike())
4014 continue;
4015 bool ApplyTerminalRule = applyTerminalRule(MI);
4016 if (isLocalCopy(&MI, LIS)) {
4017 if (ApplyTerminalRule)
4018 LocalTerminals.push_back(&MI);
4019 else
4020 LocalWorkList.push_back(&MI);
4021 } else {
4022 if (ApplyTerminalRule)
4023 GlobalTerminals.push_back(&MI);
4024 else
4025 WorkList.push_back(&MI);
4026 }
4027 }
4028 // Append the copies evicted by the terminal rule at the end of the list.
4029 LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4030 WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4031 }
4032 else {
4034 for (MachineInstr &MII : *MBB)
4035 if (MII.isCopyLike()) {
4036 if (applyTerminalRule(MII))
4037 Terminals.push_back(&MII);
4038 else
4039 WorkList.push_back(&MII);
4040 }
4041 // Append the copies evicted by the terminal rule at the end of the list.
4042 WorkList.append(Terminals.begin(), Terminals.end());
4043 }
4044 // Try coalescing the collected copies immediately, and remove the nulls.
4045 // This prevents the WorkList from getting too large since most copies are
4046 // joinable on the first attempt.
4048 CurrList(WorkList.begin() + PrevSize, WorkList.end());
4049 if (copyCoalesceWorkList(CurrList))
4050 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
4051 nullptr), WorkList.end());
4052}
4053
4054void RegisterCoalescer::coalesceLocals() {
4055 copyCoalesceWorkList(LocalWorkList);
4056 for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
4057 if (LocalWorkList[j])
4058 WorkList.push_back(LocalWorkList[j]);
4059 }
4060 LocalWorkList.clear();
4061}
4062
4063void RegisterCoalescer::joinAllIntervals() {
4064 LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4065 assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4066
4067 std::vector<MBBPriorityInfo> MBBs;
4068 MBBs.reserve(MF->size());
4069 for (MachineBasicBlock &MBB : *MF) {
4070 MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4071 JoinSplitEdges && isSplitEdge(&MBB)));
4072 }
4073 array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4074
4075 // Coalesce intervals in MBB priority order.
4076 unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4077 for (MBBPriorityInfo &MBB : MBBs) {
4078 // Try coalescing the collected local copies for deeper loops.
4079 if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4080 coalesceLocals();
4081 CurrDepth = MBB.Depth;
4082 }
4083 copyCoalesceInMBB(MBB.MBB);
4084 }
4085 lateLiveIntervalUpdate();
4086 coalesceLocals();
4087
4088 // Joining intervals can allow other intervals to be joined. Iteratively join
4089 // until we make no progress.
4090 while (copyCoalesceWorkList(WorkList))
4091 /* empty */ ;
4092 lateLiveIntervalUpdate();
4093}
4094
4095void RegisterCoalescer::releaseMemory() {
4096 ErasedInstrs.clear();
4097 WorkList.clear();
4098 DeadDefs.clear();
4099 InflateRegs.clear();
4100 LargeLIVisitCounter.clear();
4101}
4102
4103bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4104 LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
4105 << "********** Function: " << fn.getName() << '\n');
4106
4107 // Variables changed between a setjmp and a longjump can have undefined value
4108 // after the longjmp. This behaviour can be observed if such a variable is
4109 // spilled, so longjmp won't restore the value in the spill slot.
4110 // RegisterCoalescer should not run in functions with a setjmp to avoid
4111 // merging such undefined variables with predictable ones.
4112 //
4113 // TODO: Could specifically disable coalescing registers live across setjmp
4114 // calls
4115 if (fn.exposesReturnsTwice()) {
4116 LLVM_DEBUG(
4117 dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4118 return false;
4119 }
4120
4121 MF = &fn;
4122 MRI = &fn.getRegInfo();
4123 const TargetSubtargetInfo &STI = fn.getSubtarget();
4124 TRI = STI.getRegisterInfo();
4125 TII = STI.getInstrInfo();
4126 LIS = &getAnalysis<LiveIntervals>();
4127 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4128 Loops = &getAnalysis<MachineLoopInfo>();
4130 JoinGlobalCopies = STI.enableJoinGlobalCopies();
4131 else
4132 JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4133
4134 // If there are PHIs tracked by debug-info, they will need updating during
4135 // coalescing. Build an index of those PHIs to ease updating.
4136 SlotIndexes *Slots = LIS->getSlotIndexes();
4137 for (const auto &DebugPHI : MF->DebugPHIPositions) {
4138 MachineBasicBlock *MBB = DebugPHI.second.MBB;
4139 Register Reg = DebugPHI.second.Reg;
4140 unsigned SubReg = DebugPHI.second.SubReg;
4141 SlotIndex SI = Slots->getMBBStartIdx(MBB);
4142 PHIValPos P = {SI, Reg, SubReg};
4143 PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4144 RegToPHIIdx[Reg].push_back(DebugPHI.first);
4145 }
4146
4147 // The MachineScheduler does not currently require JoinSplitEdges. This will
4148 // either be enabled unconditionally or replaced by a more general live range
4149 // splitting optimization.
4150 JoinSplitEdges = EnableJoinSplits;
4151
4152 if (VerifyCoalescing)
4153 MF->verify(this, "Before register coalescing");
4154
4155 DbgVRegToValues.clear();
4157
4158 RegClassInfo.runOnMachineFunction(fn);
4159
4160 // Join (coalesce) intervals if requested.
4161 if (EnableJoining)
4162 joinAllIntervals();
4163
4164 // After deleting a lot of copies, register classes may be less constrained.
4165 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4166 // DPR inflation.
4167 array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4168 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
4169 InflateRegs.end());
4170 LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4171 << " regs.\n");
4172 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
4173 Register Reg = InflateRegs[i];
4174 if (MRI->reg_nodbg_empty(Reg))
4175 continue;
4176 if (MRI->recomputeRegClass(Reg)) {
4177 LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4178 << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4179 ++NumInflated;
4180
4181 LiveInterval &LI = LIS->getInterval(Reg);
4182 if (LI.hasSubRanges()) {
4183 // If the inflated register class does not support subregisters anymore
4184 // remove the subranges.
4185 if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4186 LI.clearSubRanges();
4187 } else {
4188#ifndef NDEBUG
4189 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4190 // If subranges are still supported, then the same subregs
4191 // should still be supported.
4192 for (LiveInterval::SubRange &S : LI.subranges()) {
4193 assert((S.LaneMask & ~MaxMask).none());
4194 }
4195#endif
4196 }
4197 }
4198 }
4199 }
4200
4201 // After coalescing, update any PHIs that are being tracked by debug-info
4202 // with their new VReg locations.
4203 for (auto &p : MF->DebugPHIPositions) {
4204 auto it = PHIValToPos.find(p.first);
4205 assert(it != PHIValToPos.end());
4206 p.second.Reg = it->second.Reg;
4207 p.second.SubReg = it->second.SubReg;
4208 }
4209
4210 PHIValToPos.clear();
4211 RegToPHIIdx.clear();
4212
4213 LLVM_DEBUG(dump());
4214 if (VerifyCoalescing)
4215 MF->verify(this, "After register coalescing");
4216 return true;
4217}
4218
4219void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
4220 LIS->print(O, m);
4221}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1272
bool End
Definition: ELF_riscv.cpp:469
SmallVector< uint32_t, 0 > Writes
Definition: ELF_riscv.cpp:486
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Basic Register Allocator
static cl::opt< cl::boolOrDefault > EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), cl::init(cl::BOU_UNSET), cl::Hidden)
Temporary flag to test global copy optimization.
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS)
static bool isSplitEdge(const MachineBasicBlock *MBB)
Return true if this block should be vacated by the coalescer to eliminate branches.
static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS)
C-style comparator that sorts first based on the loop depth of the basic block (the unsigned),...
register Register Coalescer
register coalescer
static cl::opt< unsigned > LargeIntervalSizeThreshold("large-interval-size-threshold", cl::Hidden, cl::desc("If the valnos size of an interval is larger than the threshold, " "it is regarded as a large interval. "), cl::init(100))
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def)
Check if any of the subranges of LI contain a definition at Def.
static cl::opt< unsigned > LargeIntervalFreqThreshold("large-interval-freq-threshold", cl::Hidden, cl::desc("For a large interval, if it is coalesed with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256))
static std::pair< bool, bool > addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo)
Copy segments with value number SrcValNo from liverange Src to live range @Dst and use value number D...
static bool isLiveThrough(const LiveQueryResult Q)
static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI)
Check if DstReg is a terminal node.
static cl::opt< bool > VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden)
register Register static false bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, Register &Src, Register &Dst, unsigned &SrcSub, unsigned &DstSub)
static cl::opt< bool > EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden)
Temporary flag to test critical edge unsplitting.
static cl::opt< bool > EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden)
static bool definesFullReg(const MachineInstr &MI, Register Reg)
Returns true if MI defines the full vreg Reg, as opposed to just defining a subregister.
static cl::opt< unsigned > LateRematUpdateThreshold("late-remat-update-threshold", cl::Hidden, cl::desc("During rematerialization for a copy, if the def instruction has " "many other copy uses to be rematerialized, delay the multiple " "separate live interval update work and do them all at once after " "all those rematerialization are done. It will save a lot of " "repeated work. "), cl::init(100))
static cl::opt< bool > UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), cl::init(false), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static DenseMap< Register, std::vector< std::pair< SlotIndex, MachineInstr * > > > buildVRegToDbgValueMap(MachineFunction &MF, const LiveIntervals *Liveness)
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS)
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
bool test(unsigned Idx) const
Definition: BitVector.h:461
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
A helper class for register coalescers.
bool flip()
Swap SrcReg and DstReg.
bool isCoalescable(const MachineInstr *) const
Return true if MI is a copy instruction that will become an identity copy after coalescing.
bool setRegisters(const MachineInstr *)
Set registers to match the copy instruction MI.
This class represents an Operation in the Expression.
The location of a single variable, composed of an expression and 0 or more DbgValueLocEntries.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
bool isAsCheapAsAMove(const MachineInstr &MI) const override
A live range for subregisters.
Definition: LiveInterval.h:693
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
Register reg() const
Definition: LiveInterval.h:717
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
SubRange * createSubRangeFrom(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, const LiveRange &CopyFrom)
Like createSubRange() but the new range is filled with a copy of the liveness information in CopyFrom...
Definition: LiveInterval.h:794
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply, const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, unsigned ComposeSubRegIdx=0)
Refines the subranges to support LaneMask.
void computeSubRangeUndefs(SmallVectorImpl< SlotIndex > &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const
For a given lane mask LaneMask, compute indexes at which the lane is marked undefined by subregister ...
SubRange * createSubRange(BumpPtrAllocator &Allocator, LaneBitmask LaneMask)
Creates a new empty subregister live range.
Definition: LiveInterval.h:785
void clearSubRanges()
Removes all subregister liveness information.
bool hasInterval(Register Reg) const
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const
Returns true if VNI is killed by any PHI-def values in LI.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VNInfo::Allocator & getVNInfoAllocator()
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveRange * getCachedRegUnit(unsigned Unit)
Return the live range for register unit Unit if it has already been computed, or nullptr if it hasn't...
LiveInterval & getInterval(Register Reg)
void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl< SlotIndex > *EndPoints)
If LR has a live value at Kill, prune its live range by removing any liveness reachable from Kill.
void removeInterval(Register Reg)
Interval removal.
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos)
Remove value numbers and related live segments starting at position Pos that are part of any liverang...
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
bool isLiveInToMBB(const LiveRange &LR, const MachineBasicBlock *mbb) const
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void print(raw_ostream &O, const Module *=nullptr) const override
Implement the dump method.
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueOutOrDead() const
Returns the value alive at the end of the instruction, if any.
Definition: LiveInterval.h:129
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
Definition: LiveInterval.h:123
VNInfo * valueDefined() const
Return the value defined by this instruction, if any.
Definition: LiveInterval.h:135
SlotIndex endPoint() const
Return the end point of the last live range segment to interact with the instruction,...
Definition: LiveInterval.h:147
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
Callback methods for LiveRangeEdit owners.
Definition: LiveRangeEdit.h:45
virtual void LRE_WillEraseInstruction(MachineInstr *MI)
Called immediately before erasing a dead machine instruction.
Definition: LiveRangeEdit.h:52
void eliminateDeadDefs(SmallVectorImpl< MachineInstr * > &Dead, ArrayRef< Register > RegsBeingSpilled=std::nullopt)
eliminateDeadDefs - Try to delete machine instructions that are now dead (allDefsAreDead returns true...
SlotIndex rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, const Remat &RM, const TargetRegisterInfo &, bool Late=false, unsigned SubIdx=0, MachineInstr *ReplaceIndexMI=nullptr)
rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an instruction into MBB before...
bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI)
checkRematerializable - Manually add VNI to the list of rematerializable values if DefMI may be remat...
bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx, bool cheapAsAMove)
canRematerializeAt - Determine if ParentVNI can be rematerialized at UseIdx.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:317
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void join(LiveRange &Other, const int *ValNoAssignments, const int *RHSValNoAssignments, SmallVectorImpl< VNInfo * > &NewVNInfo)
join - Join two live ranges (this, and other) together.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc)
createDeadDef - Make sure the range has a value defined at Def.
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
void verify() const
Walk the range and assert if any invariants fail to hold.
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
VNInfo * MergeValueNumberInto(VNInfo *V1, VNInfo *V2)
MergeValueNumberInto - This method is called when two value numbers are found to be equivalent.
unsigned getNumValNums() const
Definition: LiveInterval.h:313
iterator begin()
Definition: LiveInterval.h:215
Segments segments
Definition: LiveInterval.h:203
VNInfoList valnos
Definition: LiveInterval.h:204
bool containsOneValue() const
Definition: LiveInterval.h:311
size_t size() const
Definition: LiveInterval.h:305
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none.
Definition: LiveInterval.h:436
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
MCRegUnitRootIterator enumerates the root registers of a register unit.
bool isValid() const
Check if the iterator is at the end of the list.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
bool isSafeToMove(AAResults *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool isImplicitDef() const
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
bool isCopyLike() const
Return true if the instruction behaves like a copy.
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
std::pair< bool, bool > readsWritesVirtualRegister(Register Reg, SmallVectorImpl< unsigned > *Ops=nullptr) const
Return a pair of bools (reads, writes) indicating if this instruction reads or writes Reg.
bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx=nullptr) const
Return true if the use operand of the specified index is tied to a def operand.
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:546
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:540
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:659
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:472
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:730
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
bool allDefsAreDead() const
Return true if all the defs of this instruction are dead.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
void setIsUndef(bool Val=true)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:180
bool isEarlyClobber() const
isEarlyClobber - Returns true if this is an early-clobber slot.
Definition: SlotIndexes.h:216
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:134
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:228
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
Definition: SlotIndexes.h:276
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:241
bool isDead() const
isDead - Returns true if this is a dead def kill slot.
Definition: SlotIndexes.h:223
SlotIndexes pass.
Definition: SlotIndexes.h:301
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
Returns the basic block which the given index falls in.
Definition: SlotIndexes.h:501
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:463
SlotIndex getNextNonNullIndex(SlotIndex Index)
Returns the next non-null index, if one exists.
Definition: SlotIndexes.h:396
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:372
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:453
SlotIndex getIndexBefore(const MachineInstr &MI) const
getIndexBefore - Returns the index of the last indexed instruction before MI, or the start index of i...
Definition: SlotIndexes.h:409
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:390
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex