LLVM  10.0.0svn
MachineCSE.cpp
Go to the documentation of this file.
1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global common subexpression elimination on machine
10 // instructions using a scoped hash table based value numbering scheme. It
11 // must be run while the machine function is still in SSA form.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
31 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCRegisterInfo.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Allocator.h"
40 #include "llvm/Support/Debug.h"
43 #include <cassert>
44 #include <iterator>
45 #include <utility>
46 #include <vector>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "machine-cse"
51 
52 STATISTIC(NumCoalesces, "Number of copies coalesced");
53 STATISTIC(NumCSEs, "Number of common subexpression eliminated");
54 STATISTIC(NumPREs, "Number of partial redundant expression"
55  " transformed to fully redundant");
56 STATISTIC(NumPhysCSEs,
57  "Number of physreg referencing common subexpr eliminated");
58 STATISTIC(NumCrossBBCSEs,
59  "Number of cross-MBB physreg referencing CS eliminated");
60 STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
61 
62 namespace {
63 
64  class MachineCSE : public MachineFunctionPass {
65  const TargetInstrInfo *TII;
66  const TargetRegisterInfo *TRI;
67  AliasAnalysis *AA;
71 
72  public:
73  static char ID; // Pass identification
74 
75  MachineCSE() : MachineFunctionPass(ID) {
77  }
78 
79  bool runOnMachineFunction(MachineFunction &MF) override;
80 
81  void getAnalysisUsage(AnalysisUsage &AU) const override {
82  AU.setPreservesCFG();
90  }
91 
92  void releaseMemory() override {
93  ScopeMap.clear();
94  PREMap.clear();
95  Exps.clear();
96  }
97 
98  private:
99  using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
101  using ScopedHTType =
103  AllocatorTy>;
104  using ScopeType = ScopedHTType::ScopeTy;
105  using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
106 
107  unsigned LookAheadLimit = 0;
110  PREMap;
111  ScopedHTType VNT;
113  unsigned CurrVN = 0;
114 
115  bool PerformTrivialCopyPropagation(MachineInstr *MI,
116  MachineBasicBlock *MBB);
117  bool isPhysDefTriviallyDead(unsigned Reg,
120  bool hasLivePhysRegDefUses(const MachineInstr *MI,
121  const MachineBasicBlock *MBB,
122  SmallSet<unsigned, 8> &PhysRefs,
123  PhysDefVector &PhysDefs, bool &PhysUseDef) const;
124  bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
125  SmallSet<unsigned, 8> &PhysRefs,
126  PhysDefVector &PhysDefs, bool &NonLocal) const;
127  bool isCSECandidate(MachineInstr *MI);
128  bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
129  MachineBasicBlock *CSBB, MachineInstr *MI);
130  void EnterScope(MachineBasicBlock *MBB);
131  void ExitScope(MachineBasicBlock *MBB);
132  bool ProcessBlockCSE(MachineBasicBlock *MBB);
133  void ExitScopeIfDone(MachineDomTreeNode *Node,
135  bool PerformCSE(MachineDomTreeNode *Node);
136 
137  bool isPRECandidate(MachineInstr *MI);
138  bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
139  bool PerformSimplePRE(MachineDominatorTree *DT);
140  /// Heuristics to see if it's profitable to move common computations of MBB
141  /// and MBB1 to CandidateBB.
142  bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
143  MachineBasicBlock *MBB,
144  MachineBasicBlock *MBB1);
145  };
146 
147 } // end anonymous namespace
148 
149 char MachineCSE::ID = 0;
150 
152 
154  "Machine Common Subexpression Elimination", false, false)
158  "Machine Common Subexpression Elimination", false, false)
159 
160 /// The source register of a COPY machine instruction can be propagated to all
161 /// its users, and this propagation could increase the probability of finding
162 /// common subexpressions. If the COPY has only one user, the COPY itself can
163 /// be removed.
164 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
165  MachineBasicBlock *MBB) {
166  bool Changed = false;
167  for (MachineOperand &MO : MI->operands()) {
168  if (!MO.isReg() || !MO.isUse())
169  continue;
170  Register Reg = MO.getReg();
171  if (!Register::isVirtualRegister(Reg))
172  continue;
173  bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
174  MachineInstr *DefMI = MRI->getVRegDef(Reg);
175  if (!DefMI->isCopy())
176  continue;
177  Register SrcReg = DefMI->getOperand(1).getReg();
178  if (!Register::isVirtualRegister(SrcReg))
179  continue;
180  if (DefMI->getOperand(0).getSubReg())
181  continue;
182  // FIXME: We should trivially coalesce subregister copies to expose CSE
183  // opportunities on instructions with truncated operands (see
184  // cse-add-with-overflow.ll). This can be done here as follows:
185  // if (SrcSubReg)
186  // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
187  // SrcSubReg);
188  // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
189  //
190  // The 2-addr pass has been updated to handle coalesced subregs. However,
191  // some machine-specific code still can't handle it.
192  // To handle it properly we also need a way find a constrained subregister
193  // class given a super-reg class and subreg index.
194  if (DefMI->getOperand(1).getSubReg())
195  continue;
196  if (!MRI->constrainRegAttrs(SrcReg, Reg))
197  continue;
198  LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
199  LLVM_DEBUG(dbgs() << "*** to: " << *MI);
200 
201  // Propagate SrcReg of copies to MI.
202  MO.setReg(SrcReg);
203  MRI->clearKillFlags(SrcReg);
204  // Coalesce single use copies.
205  if (OnlyOneUse) {
206  // If (and only if) we've eliminated all uses of the copy, also
207  // copy-propagate to any debug-users of MI, or they'll be left using
208  // an undefined value.
209  DefMI->changeDebugValuesDefReg(SrcReg);
210 
211  DefMI->eraseFromParent();
212  ++NumCoalesces;
213  }
214  Changed = true;
215  }
216 
217  return Changed;
218 }
219 
220 bool
221 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
224  unsigned LookAheadLeft = LookAheadLimit;
225  while (LookAheadLeft) {
226  // Skip over dbg_value's.
228 
229  if (I == E)
230  // Reached end of block, we don't know if register is dead or not.
231  return false;
232 
233  bool SeenDef = false;
234  for (const MachineOperand &MO : I->operands()) {
235  if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
236  SeenDef = true;
237  if (!MO.isReg() || !MO.getReg())
238  continue;
239  if (!TRI->regsOverlap(MO.getReg(), Reg))
240  continue;
241  if (MO.isUse())
242  // Found a use!
243  return false;
244  SeenDef = true;
245  }
246  if (SeenDef)
247  // See a def of Reg (or an alias) before encountering any use, it's
248  // trivially dead.
249  return true;
250 
251  --LookAheadLeft;
252  ++I;
253  }
254  return false;
255 }
256 
257 static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
258  const MachineFunction &MF,
259  const TargetRegisterInfo &TRI) {
260  // MachineRegisterInfo::isConstantPhysReg directly called by
261  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
262  // reserved registers to be frozen. That doesn't cause a problem post-ISel as
263  // most (if not all) targets freeze reserved registers right after ISel.
264  //
265  // It does cause issues mid-GlobalISel, however, hence the additional
266  // reservedRegsFrozen check.
267  const MachineRegisterInfo &MRI = MF.getRegInfo();
268  return TRI.isCallerPreservedPhysReg(Reg, MF) ||
269  (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
270 }
271 
272 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
273 /// physical registers (except for dead defs of physical registers). It also
274 /// returns the physical register def by reference if it's the only one and the
275 /// instruction does not uses a physical register.
276 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
277  const MachineBasicBlock *MBB,
278  SmallSet<unsigned, 8> &PhysRefs,
279  PhysDefVector &PhysDefs,
280  bool &PhysUseDef) const {
281  // First, add all uses to PhysRefs.
282  for (const MachineOperand &MO : MI->operands()) {
283  if (!MO.isReg() || MO.isDef())
284  continue;
285  Register Reg = MO.getReg();
286  if (!Reg)
287  continue;
289  continue;
290  // Reading either caller preserved or constant physregs is ok.
291  if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
292  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
293  PhysRefs.insert(*AI);
294  }
295 
296  // Next, collect all defs into PhysDefs. If any is already in PhysRefs
297  // (which currently contains only uses), set the PhysUseDef flag.
298  PhysUseDef = false;
299  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
300  for (const auto &MOP : llvm::enumerate(MI->operands())) {
301  const MachineOperand &MO = MOP.value();
302  if (!MO.isReg() || !MO.isDef())
303  continue;
304  Register Reg = MO.getReg();
305  if (!Reg)
306  continue;
308  continue;
309  // Check against PhysRefs even if the def is "dead".
310  if (PhysRefs.count(Reg))
311  PhysUseDef = true;
312  // If the def is dead, it's ok. But the def may not marked "dead". That's
313  // common since this pass is run before livevariables. We can scan
314  // forward a few instructions and check if it is obviously dead.
315  if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
316  PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
317  }
318 
319  // Finally, add all defs to PhysRefs as well.
320  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
321  for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
322  ++AI)
323  PhysRefs.insert(*AI);
324 
325  return !PhysRefs.empty();
326 }
327 
328 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
329  SmallSet<unsigned, 8> &PhysRefs,
330  PhysDefVector &PhysDefs,
331  bool &NonLocal) const {
332  // For now conservatively returns false if the common subexpression is
333  // not in the same basic block as the given instruction. The only exception
334  // is if the common subexpression is in the sole predecessor block.
335  const MachineBasicBlock *MBB = MI->getParent();
336  const MachineBasicBlock *CSMBB = CSMI->getParent();
337 
338  bool CrossMBB = false;
339  if (CSMBB != MBB) {
340  if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
341  return false;
342 
343  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
344  if (MRI->isAllocatable(PhysDefs[i].second) ||
345  MRI->isReserved(PhysDefs[i].second))
346  // Avoid extending live range of physical registers if they are
347  //allocatable or reserved.
348  return false;
349  }
350  CrossMBB = true;
351  }
352  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
355  unsigned LookAheadLeft = LookAheadLimit;
356  while (LookAheadLeft) {
357  // Skip over dbg_value's.
358  while (I != E && I != EE && I->isDebugInstr())
359  ++I;
360 
361  if (I == EE) {
362  assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
363  (void)CrossMBB;
364  CrossMBB = false;
365  NonLocal = true;
366  I = MBB->begin();
367  EE = MBB->end();
368  continue;
369  }
370 
371  if (I == E)
372  return true;
373 
374  for (const MachineOperand &MO : I->operands()) {
375  // RegMasks go on instructions like calls that clobber lots of physregs.
376  // Don't attempt to CSE across such an instruction.
377  if (MO.isRegMask())
378  return false;
379  if (!MO.isReg() || !MO.isDef())
380  continue;
381  Register MOReg = MO.getReg();
382  if (Register::isVirtualRegister(MOReg))
383  continue;
384  if (PhysRefs.count(MOReg))
385  return false;
386  }
387 
388  --LookAheadLeft;
389  ++I;
390  }
391 
392  return false;
393 }
394 
395 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
396  if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
397  MI->isInlineAsm() || MI->isDebugInstr())
398  return false;
399 
400  // Ignore copies.
401  if (MI->isCopyLike())
402  return false;
403 
404  // Ignore stuff that we obviously can't move.
405  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
407  return false;
408 
409  if (MI->mayLoad()) {
410  // Okay, this instruction does a load. As a refinement, we allow the target
411  // to decide whether the loaded value is actually a constant. If so, we can
412  // actually use it as a load.
413  if (!MI->isDereferenceableInvariantLoad(AA))
414  // FIXME: we should be able to hoist loads with no other side effects if
415  // there are no other instructions which can change memory in this loop.
416  // This is a trivial form of alias analysis.
417  return false;
418  }
419 
420  // Ignore stack guard loads, otherwise the register that holds CSEed value may
421  // be spilled and get loaded back with corrupted data.
422  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
423  return false;
424 
425  return true;
426 }
427 
428 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
429 /// common expression that defines Reg. CSBB is basic block where CSReg is
430 /// defined.
431 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
432  MachineBasicBlock *CSBB, MachineInstr *MI) {
433  // FIXME: Heuristics that works around the lack the live range splitting.
434 
435  // If CSReg is used at all uses of Reg, CSE should not increase register
436  // pressure of CSReg.
437  bool MayIncreasePressure = true;
439  MayIncreasePressure = false;
441  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
442  CSUses.insert(&MI);
443  }
444  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
445  if (!CSUses.count(&MI)) {
446  MayIncreasePressure = true;
447  break;
448  }
449  }
450  }
451  if (!MayIncreasePressure) return true;
452 
453  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
454  // an immediate predecessor. We don't want to increase register pressure and
455  // end up causing other computation to be spilled.
456  if (TII->isAsCheapAsAMove(*MI)) {
457  MachineBasicBlock *BB = MI->getParent();
458  if (CSBB != BB && !CSBB->isSuccessor(BB))
459  return false;
460  }
461 
462  // Heuristics #2: If the expression doesn't not use a vr and the only use
463  // of the redundant computation are copies, do not cse.
464  bool HasVRegUse = false;
465  for (const MachineOperand &MO : MI->operands()) {
466  if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
467  HasVRegUse = true;
468  break;
469  }
470  }
471  if (!HasVRegUse) {
472  bool HasNonCopyUse = false;
473  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
474  // Ignore copies.
475  if (!MI.isCopyLike()) {
476  HasNonCopyUse = true;
477  break;
478  }
479  }
480  if (!HasNonCopyUse)
481  return false;
482  }
483 
484  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
485  // it unless the defined value is already used in the BB of the new use.
486  bool HasPHI = false;
487  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
488  HasPHI |= UseMI.isPHI();
489  if (UseMI.getParent() == MI->getParent())
490  return true;
491  }
492 
493  return !HasPHI;
494 }
495 
496 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
497  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
498  ScopeType *Scope = new ScopeType(VNT);
499  ScopeMap[MBB] = Scope;
500 }
501 
502 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
503  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
505  assert(SI != ScopeMap.end());
506  delete SI->second;
507  ScopeMap.erase(SI);
508 }
509 
510 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
511  bool Changed = false;
512 
514  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
515  SmallVector<unsigned, 2> ImplicitDefs;
516  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
517  MachineInstr *MI = &*I;
518  ++I;
519 
520  if (!isCSECandidate(MI))
521  continue;
522 
523  bool FoundCSE = VNT.count(MI);
524  if (!FoundCSE) {
525  // Using trivial copy propagation to find more CSE opportunities.
526  if (PerformTrivialCopyPropagation(MI, MBB)) {
527  Changed = true;
528 
529  // After coalescing MI itself may become a copy.
530  if (MI->isCopyLike())
531  continue;
532 
533  // Try again to see if CSE is possible.
534  FoundCSE = VNT.count(MI);
535  }
536  }
537 
538  // Commute commutable instructions.
539  bool Commuted = false;
540  if (!FoundCSE && MI->isCommutable()) {
541  if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
542  Commuted = true;
543  FoundCSE = VNT.count(NewMI);
544  if (NewMI != MI) {
545  // New instruction. It doesn't need to be kept.
546  NewMI->eraseFromParent();
547  Changed = true;
548  } else if (!FoundCSE)
549  // MI was changed but it didn't help, commute it back!
550  (void)TII->commuteInstruction(*MI);
551  }
552  }
553 
554  // If the instruction defines physical registers and the values *may* be
555  // used, then it's not safe to replace it with a common subexpression.
556  // It's also not safe if the instruction uses physical registers.
557  bool CrossMBBPhysDef = false;
558  SmallSet<unsigned, 8> PhysRefs;
559  PhysDefVector PhysDefs;
560  bool PhysUseDef = false;
561  if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
562  PhysDefs, PhysUseDef)) {
563  FoundCSE = false;
564 
565  // ... Unless the CS is local or is in the sole predecessor block
566  // and it also defines the physical register which is not clobbered
567  // in between and the physical register uses were not clobbered.
568  // This can never be the case if the instruction both uses and
569  // defines the same physical register, which was detected above.
570  if (!PhysUseDef) {
571  unsigned CSVN = VNT.lookup(MI);
572  MachineInstr *CSMI = Exps[CSVN];
573  if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
574  FoundCSE = true;
575  }
576  }
577 
578  if (!FoundCSE) {
579  VNT.insert(MI, CurrVN++);
580  Exps.push_back(MI);
581  continue;
582  }
583 
584  // Found a common subexpression, eliminate it.
585  unsigned CSVN = VNT.lookup(MI);
586  MachineInstr *CSMI = Exps[CSVN];
587  LLVM_DEBUG(dbgs() << "Examining: " << *MI);
588  LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
589 
590  // Check if it's profitable to perform this CSE.
591  bool DoCSE = true;
592  unsigned NumDefs = MI->getNumDefs();
593 
594  for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
595  MachineOperand &MO = MI->getOperand(i);
596  if (!MO.isReg() || !MO.isDef())
597  continue;
598  Register OldReg = MO.getReg();
599  Register NewReg = CSMI->getOperand(i).getReg();
600 
601  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
602  // we should make sure it is not dead at CSMI.
603  if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
604  ImplicitDefsToUpdate.push_back(i);
605 
606  // Keep track of implicit defs of CSMI and MI, to clear possibly
607  // made-redundant kill flags.
608  if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
609  ImplicitDefs.push_back(OldReg);
610 
611  if (OldReg == NewReg) {
612  --NumDefs;
613  continue;
614  }
615 
617  Register::isVirtualRegister(NewReg) &&
618  "Do not CSE physical register defs!");
619 
620  if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
621  LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
622  DoCSE = false;
623  break;
624  }
625 
626  // Don't perform CSE if the result of the new instruction cannot exist
627  // within the constraints (register class, bank, or low-level type) of
628  // the old instruction.
629  if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
630  LLVM_DEBUG(
631  dbgs() << "*** Not the same register constraints, avoid CSE!\n");
632  DoCSE = false;
633  break;
634  }
635 
636  CSEPairs.push_back(std::make_pair(OldReg, NewReg));
637  --NumDefs;
638  }
639 
640  // Actually perform the elimination.
641  if (DoCSE) {
642  for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
643  unsigned OldReg = CSEPair.first;
644  unsigned NewReg = CSEPair.second;
645  // OldReg may have been unused but is used now, clear the Dead flag
646  MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
647  assert(Def != nullptr && "CSEd register has no unique definition?");
648  Def->clearRegisterDeads(NewReg);
649  // Replace with NewReg and clear kill flags which may be wrong now.
650  MRI->replaceRegWith(OldReg, NewReg);
651  MRI->clearKillFlags(NewReg);
652  }
653 
654  // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
655  // we should make sure it is not dead at CSMI.
656  for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
657  CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
658  for (auto PhysDef : PhysDefs)
659  if (!MI->getOperand(PhysDef.first).isDead())
660  CSMI->getOperand(PhysDef.first).setIsDead(false);
661 
662  // Go through implicit defs of CSMI and MI, and clear the kill flags on
663  // their uses in all the instructions between CSMI and MI.
664  // We might have made some of the kill flags redundant, consider:
665  // subs ... implicit-def %nzcv <- CSMI
666  // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
667  // subs ... implicit-def %nzcv <- MI, to be eliminated
668  // csinc ... implicit killed %nzcv
669  // Since we eliminated MI, and reused a register imp-def'd by CSMI
670  // (here %nzcv), that register, if it was killed before MI, should have
671  // that kill flag removed, because it's lifetime was extended.
672  if (CSMI->getParent() == MI->getParent()) {
673  for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
674  for (auto ImplicitDef : ImplicitDefs)
675  if (MachineOperand *MO = II->findRegisterUseOperand(
676  ImplicitDef, /*isKill=*/true, TRI))
677  MO->setIsKill(false);
678  } else {
679  // If the instructions aren't in the same BB, bail out and clear the
680  // kill flag on all uses of the imp-def'd register.
681  for (auto ImplicitDef : ImplicitDefs)
682  MRI->clearKillFlags(ImplicitDef);
683  }
684 
685  if (CrossMBBPhysDef) {
686  // Add physical register defs now coming in from a predecessor to MBB
687  // livein list.
688  while (!PhysDefs.empty()) {
689  auto LiveIn = PhysDefs.pop_back_val();
690  if (!MBB->isLiveIn(LiveIn.second))
691  MBB->addLiveIn(LiveIn.second);
692  }
693  ++NumCrossBBCSEs;
694  }
695 
696  MI->eraseFromParent();
697  ++NumCSEs;
698  if (!PhysRefs.empty())
699  ++NumPhysCSEs;
700  if (Commuted)
701  ++NumCommutes;
702  Changed = true;
703  } else {
704  VNT.insert(MI, CurrVN++);
705  Exps.push_back(MI);
706  }
707  CSEPairs.clear();
708  ImplicitDefsToUpdate.clear();
709  ImplicitDefs.clear();
710  }
711 
712  return Changed;
713 }
714 
715 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
716 /// dominator tree node if its a leaf or all of its children are done. Walk
717 /// up the dominator tree to destroy ancestors which are now done.
718 void
719 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
721  if (OpenChildren[Node])
722  return;
723 
724  // Pop scope.
725  ExitScope(Node->getBlock());
726 
727  // Now traverse upwards to pop ancestors whose offsprings are all done.
728  while (MachineDomTreeNode *Parent = Node->getIDom()) {
729  unsigned Left = --OpenChildren[Parent];
730  if (Left != 0)
731  break;
732  ExitScope(Parent->getBlock());
733  Node = Parent;
734  }
735 }
736 
737 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
741 
742  CurrVN = 0;
743 
744  // Perform a DFS walk to determine the order of visit.
745  WorkList.push_back(Node);
746  do {
747  Node = WorkList.pop_back_val();
748  Scopes.push_back(Node);
749  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
750  OpenChildren[Node] = Children.size();
751  for (MachineDomTreeNode *Child : Children)
752  WorkList.push_back(Child);
753  } while (!WorkList.empty());
754 
755  // Now perform CSE.
756  bool Changed = false;
757  for (MachineDomTreeNode *Node : Scopes) {
758  MachineBasicBlock *MBB = Node->getBlock();
759  EnterScope(MBB);
760  Changed |= ProcessBlockCSE(MBB);
761  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
762  ExitScopeIfDone(Node, OpenChildren);
763  }
764 
765  return Changed;
766 }
767 
768 // We use stronger checks for PRE candidate rather than for CSE ones to embrace
769 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
770 // to exclude instrs created by PRE that won't be CSEed later.
771 bool MachineCSE::isPRECandidate(MachineInstr *MI) {
772  if (!isCSECandidate(MI) ||
773  MI->isNotDuplicable() ||
774  MI->mayLoad() ||
775  MI->isAsCheapAsAMove() ||
776  MI->getNumDefs() != 1 ||
777  MI->getNumExplicitDefs() != 1)
778  return false;
779 
780  for (auto def : MI->defs())
781  if (!Register::isVirtualRegister(def.getReg()))
782  return false;
783 
784  for (auto use : MI->uses())
785  if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
786  return false;
787 
788  return true;
789 }
790 
791 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
792  MachineBasicBlock *MBB) {
793  bool Changed = false;
794  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
795  MachineInstr *MI = &*I;
796  ++I;
797 
798  if (!isPRECandidate(MI))
799  continue;
800 
801  if (!PREMap.count(MI)) {
802  PREMap[MI] = MBB;
803  continue;
804  }
805 
806  auto MBB1 = PREMap[MI];
807  assert(
808  !DT->properlyDominates(MBB, MBB1) &&
809  "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
810  auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
811  if (!CMBB->isLegalToHoistInto())
812  continue;
813 
814  if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
815  continue;
816 
817  // Two instrs are partial redundant if their basic blocks are reachable
818  // from one to another but one doesn't dominate another.
819  if (CMBB != MBB1) {
820  auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
821  if (BB != nullptr && BB1 != nullptr &&
822  (isPotentiallyReachable(BB1, BB) ||
823  isPotentiallyReachable(BB, BB1))) {
824 
825  assert(MI->getOperand(0).isDef() &&
826  "First operand of instr with one explicit def must be this def");
827  Register VReg = MI->getOperand(0).getReg();
828  Register NewReg = MRI->cloneVirtualRegister(VReg);
829  if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
830  continue;
831  MachineInstr &NewMI =
832  TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
833  NewMI.getOperand(0).setReg(NewReg);
834 
835  PREMap[MI] = CMBB;
836  ++NumPREs;
837  Changed = true;
838  }
839  }
840  }
841  return Changed;
842 }
843 
844 // This simple PRE (partial redundancy elimination) pass doesn't actually
845 // eliminate partial redundancy but transforms it to full redundancy,
846 // anticipating that the next CSE step will eliminate this created redundancy.
847 // If CSE doesn't eliminate this, than created instruction will remain dead
848 // and eliminated later by Remove Dead Machine Instructions pass.
849 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
851 
852  PREMap.clear();
853  bool Changed = false;
854  BBs.push_back(DT->getRootNode());
855  do {
856  auto Node = BBs.pop_back_val();
857  const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
858  for (MachineDomTreeNode *Child : Children)
859  BBs.push_back(Child);
860 
861  MachineBasicBlock *MBB = Node->getBlock();
862  Changed |= ProcessBlockPRE(DT, MBB);
863 
864  } while (!BBs.empty());
865 
866  return Changed;
867 }
868 
869 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
870  MachineBasicBlock *MBB,
871  MachineBasicBlock *MBB1) {
872  if (CandidateBB->getParent()->getFunction().hasMinSize())
873  return true;
874  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
875  assert(DT->dominates(CandidateBB, MBB1) &&
876  "CandidateBB should dominate MBB1");
877  return MBFI->getBlockFreq(CandidateBB) <=
878  MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
879 }
880 
881 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
882  if (skipFunction(MF.getFunction()))
883  return false;
884 
885  TII = MF.getSubtarget().getInstrInfo();
886  TRI = MF.getSubtarget().getRegisterInfo();
887  MRI = &MF.getRegInfo();
888  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
889  DT = &getAnalysis<MachineDominatorTree>();
890  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
891  LookAheadLimit = TII->getMachineCSELookAheadLimit();
892  bool ChangedPRE, ChangedCSE;
893  ChangedPRE = PerformSimplePRE(DT);
894  ChangedCSE = PerformCSE(DT->getRootNode());
895  return ChangedPRE || ChangedCSE;
896 }
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:157
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isReserved(Register PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:657
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:510
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock *> *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction &#39;To&#39; is reachable from &#39;From&#39;, without passing through any blocks in Ex...
Definition: CFG.cpp:218
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_END(MachineCSE
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void initializeMachineCSEPass(PassRegistry &)
#define DEBUG_TYPE
Definition: MachineCSE.cpp:50
unsigned Reg
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
MachineInstr * commuteInstruction(MachineInstr &MI, bool NewMI=false, unsigned OpIdx1=CommuteAnyOperandIndex, unsigned OpIdx2=CommuteAnyOperandIndex) const
This method commutes the operands of the given machine instruction MI.
unsigned getSubReg() const
bool isInlineAsm() const
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
unsigned second
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
void setIsDead(bool Val=true)
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
virtual unsigned getMachineCSELookAheadLimit() const
Return the value to use for the MachineCSE&#39;s LookAheadLimit, which is a heuristic used for CSE&#39;ing ph...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:673
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:151
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:864
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Base class for the actual dominator tree node.
Definition: LiveRangeCalc.h:37
const std::vector< DomTreeNodeBase * > & getChildren() const
AnalysisUsage & addPreservedID(const void *ID)
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
void setReg(Register Reg)
Change the register this operand corresponds to.
virtual const TargetInstrInfo * getInstrInfo() const
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
TargetInstrInfo - Interface to description of machine instruction set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:439
NodeT * getBlock() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:844
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Move duplicate certain instructions close to their use
Definition: Localizer.cpp:27
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
constexpr double e
Definition: MathExtras.h:57
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isDebugInstr() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
virtual bool isAsCheapAsAMove(const MachineInstr &MI) const
Return true if the instruction is as cheap as a move instruction.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
MachineDomTreeNode * getRootNode() const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned pred_size() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
IterT skipDebugInstructionsForward(IterT It, IterT End)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:426
virtual bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const
Physical registers that may be modified within a function but are guaranteed to be restored before an...
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
bool isKill() const
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static bool isCallerPreservedOrConstPhysReg(unsigned Reg, const MachineFunction &MF, const TargetRegisterInfo &TRI)
Definition: MachineCSE.cpp:257
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:831
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isPosition() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(unsigned Reg) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
Definition: MachineInstr.h:941
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:883
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1500
bool isNotDuplicable(QueryType Type=AnyInBundle) const
Return true if this instruction cannot be safely duplicated.
Definition: MachineInstr.h:747
bool isImplicit() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164