LLVM 18.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
31#include "llvm/CodeGen/Passes.h"
37#include "llvm/MC/MCRegister.h"
39#include "llvm/Pass.h"
41#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <iterator>
46#include <utility>
47#include <vector>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
69 "aggressive-machine-cse", cl::Hidden, cl::init(false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74 class MachineCSE : public MachineFunctionPass {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 AliasAnalysis *AA = nullptr;
78 MachineDominatorTree *DT = nullptr;
79 MachineRegisterInfo *MRI = nullptr;
80 MachineBlockFrequencyInfo *MBFI = nullptr;
81
82 public:
83 static char ID; // Pass identification
84
85 MachineCSE() : MachineFunctionPass(ID) {
87 }
88
89 bool runOnMachineFunction(MachineFunction &MF) override;
90
91 void getAnalysisUsage(AnalysisUsage &AU) const override {
92 AU.setPreservesCFG();
100 }
101
104 .set(MachineFunctionProperties::Property::IsSSA);
105 }
106
107 void releaseMemory() override {
108 ScopeMap.clear();
109 PREMap.clear();
110 Exps.clear();
111 }
112
113 private:
114 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
116 using ScopedHTType =
118 AllocatorTy>;
119 using ScopeType = ScopedHTType::ScopeTy;
120 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
121
122 unsigned LookAheadLimit = 0;
125 PREMap;
126 ScopedHTType VNT;
128 unsigned CurrVN = 0;
129
130 bool PerformTrivialCopyPropagation(MachineInstr *MI,
132 bool isPhysDefTriviallyDead(MCRegister Reg,
135 bool hasLivePhysRegDefUses(const MachineInstr *MI,
136 const MachineBasicBlock *MBB,
137 SmallSet<MCRegister, 8> &PhysRefs,
138 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
139 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
140 SmallSet<MCRegister, 8> &PhysRefs,
141 PhysDefVector &PhysDefs, bool &NonLocal) const;
142 bool isCSECandidate(MachineInstr *MI);
143 bool isProfitableToCSE(Register CSReg, Register Reg,
145 void EnterScope(MachineBasicBlock *MBB);
146 void ExitScope(MachineBasicBlock *MBB);
147 bool ProcessBlockCSE(MachineBasicBlock *MBB);
148 void ExitScopeIfDone(MachineDomTreeNode *Node,
150 bool PerformCSE(MachineDomTreeNode *Node);
151
152 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
153 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
154 bool PerformSimplePRE(MachineDominatorTree *DT);
155 /// Heuristics to see if it's profitable to move common computations of MBB
156 /// and MBB1 to CandidateBB.
157 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
159 MachineBasicBlock *MBB1);
160 };
161
162} // end anonymous namespace
163
164char MachineCSE::ID = 0;
165
166char &llvm::MachineCSEID = MachineCSE::ID;
167
169 "Machine Common Subexpression Elimination", false, false)
173 "Machine Common Subexpression Elimination", false, false)
174
175/// The source register of a COPY machine instruction can be propagated to all
176/// its users, and this propagation could increase the probability of finding
177/// common subexpressions. If the COPY has only one user, the COPY itself can
178/// be removed.
179bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
181 bool Changed = false;
182 for (MachineOperand &MO : MI->all_uses()) {
183 Register Reg = MO.getReg();
184 if (!Reg.isVirtual())
185 continue;
186 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
187 MachineInstr *DefMI = MRI->getVRegDef(Reg);
188 if (!DefMI->isCopy())
189 continue;
190 Register SrcReg = DefMI->getOperand(1).getReg();
191 if (!SrcReg.isVirtual())
192 continue;
193 if (DefMI->getOperand(0).getSubReg())
194 continue;
195 // FIXME: We should trivially coalesce subregister copies to expose CSE
196 // opportunities on instructions with truncated operands (see
197 // cse-add-with-overflow.ll). This can be done here as follows:
198 // if (SrcSubReg)
199 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
200 // SrcSubReg);
201 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
202 //
203 // The 2-addr pass has been updated to handle coalesced subregs. However,
204 // some machine-specific code still can't handle it.
205 // To handle it properly we also need a way find a constrained subregister
206 // class given a super-reg class and subreg index.
207 if (DefMI->getOperand(1).getSubReg())
208 continue;
209 if (!MRI->constrainRegAttrs(SrcReg, Reg))
210 continue;
211 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
212 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
213
214 // Propagate SrcReg of copies to MI.
215 MO.setReg(SrcReg);
216 MRI->clearKillFlags(SrcReg);
217 // Coalesce single use copies.
218 if (OnlyOneUse) {
219 // If (and only if) we've eliminated all uses of the copy, also
220 // copy-propagate to any debug-users of MI, or they'll be left using
221 // an undefined value.
223
225 ++NumCoalesces;
226 }
227 Changed = true;
228 }
229
230 return Changed;
231}
232
233bool MachineCSE::isPhysDefTriviallyDead(
236 unsigned LookAheadLeft = LookAheadLimit;
237 while (LookAheadLeft) {
238 // Skip over dbg_value's.
240
241 if (I == E)
242 // Reached end of block, we don't know if register is dead or not.
243 return false;
244
245 bool SeenDef = false;
246 for (const MachineOperand &MO : I->operands()) {
247 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
248 SeenDef = true;
249 if (!MO.isReg() || !MO.getReg())
250 continue;
251 if (!TRI->regsOverlap(MO.getReg(), Reg))
252 continue;
253 if (MO.isUse())
254 // Found a use!
255 return false;
256 SeenDef = true;
257 }
258 if (SeenDef)
259 // See a def of Reg (or an alias) before encountering any use, it's
260 // trivially dead.
261 return true;
262
263 --LookAheadLeft;
264 ++I;
265 }
266 return false;
267}
268
270 const MachineOperand &MO,
271 const MachineFunction &MF,
272 const TargetRegisterInfo &TRI,
273 const TargetInstrInfo &TII) {
274 // MachineRegisterInfo::isConstantPhysReg directly called by
275 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
276 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
277 // most (if not all) targets freeze reserved registers right after ISel.
278 //
279 // It does cause issues mid-GlobalISel, however, hence the additional
280 // reservedRegsFrozen check.
281 const MachineRegisterInfo &MRI = MF.getRegInfo();
282 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
283 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
284}
285
286/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
287/// physical registers (except for dead defs of physical registers). It also
288/// returns the physical register def by reference if it's the only one and the
289/// instruction does not uses a physical register.
290bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
291 const MachineBasicBlock *MBB,
292 SmallSet<MCRegister, 8> &PhysRefs,
293 PhysDefVector &PhysDefs,
294 bool &PhysUseDef) const {
295 // First, add all uses to PhysRefs.
296 for (const MachineOperand &MO : MI->all_uses()) {
297 Register Reg = MO.getReg();
298 if (!Reg)
299 continue;
300 if (Reg.isVirtual())
301 continue;
302 // Reading either caller preserved or constant physregs is ok.
303 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
304 *TII))
305 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
306 PhysRefs.insert(*AI);
307 }
308
309 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
310 // (which currently contains only uses), set the PhysUseDef flag.
311 PhysUseDef = false;
312 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
313 for (const auto &MOP : llvm::enumerate(MI->operands())) {
314 const MachineOperand &MO = MOP.value();
315 if (!MO.isReg() || !MO.isDef())
316 continue;
317 Register Reg = MO.getReg();
318 if (!Reg)
319 continue;
320 if (Reg.isVirtual())
321 continue;
322 // Check against PhysRefs even if the def is "dead".
323 if (PhysRefs.count(Reg.asMCReg()))
324 PhysUseDef = true;
325 // If the def is dead, it's ok. But the def may not marked "dead". That's
326 // common since this pass is run before livevariables. We can scan
327 // forward a few instructions and check if it is obviously dead.
328 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
329 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
330 }
331
332 // Finally, add all defs to PhysRefs as well.
333 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
334 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
335 ++AI)
336 PhysRefs.insert(*AI);
337
338 return !PhysRefs.empty();
339}
340
341bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
342 SmallSet<MCRegister, 8> &PhysRefs,
343 PhysDefVector &PhysDefs,
344 bool &NonLocal) const {
345 // For now conservatively returns false if the common subexpression is
346 // not in the same basic block as the given instruction. The only exception
347 // is if the common subexpression is in the sole predecessor block.
348 const MachineBasicBlock *MBB = MI->getParent();
349 const MachineBasicBlock *CSMBB = CSMI->getParent();
350
351 bool CrossMBB = false;
352 if (CSMBB != MBB) {
353 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
354 return false;
355
356 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
357 if (MRI->isAllocatable(PhysDefs[i].second) ||
358 MRI->isReserved(PhysDefs[i].second))
359 // Avoid extending live range of physical registers if they are
360 //allocatable or reserved.
361 return false;
362 }
363 CrossMBB = true;
364 }
365 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
368 unsigned LookAheadLeft = LookAheadLimit;
369 while (LookAheadLeft) {
370 // Skip over dbg_value's.
371 while (I != E && I != EE && I->isDebugInstr())
372 ++I;
373
374 if (I == EE) {
375 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
376 (void)CrossMBB;
377 CrossMBB = false;
378 NonLocal = true;
379 I = MBB->begin();
380 EE = MBB->end();
381 continue;
382 }
383
384 if (I == E)
385 return true;
386
387 for (const MachineOperand &MO : I->operands()) {
388 // RegMasks go on instructions like calls that clobber lots of physregs.
389 // Don't attempt to CSE across such an instruction.
390 if (MO.isRegMask())
391 return false;
392 if (!MO.isReg() || !MO.isDef())
393 continue;
394 Register MOReg = MO.getReg();
395 if (MOReg.isVirtual())
396 continue;
397 if (PhysRefs.count(MOReg.asMCReg()))
398 return false;
399 }
400
401 --LookAheadLeft;
402 ++I;
403 }
404
405 return false;
406}
407
408bool MachineCSE::isCSECandidate(MachineInstr *MI) {
409 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
410 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
411 return false;
412
413 // Ignore copies.
414 if (MI->isCopyLike())
415 return false;
416
417 // Ignore stuff that we obviously can't move.
418 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
419 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
420 return false;
421
422 if (MI->mayLoad()) {
423 // Okay, this instruction does a load. As a refinement, we allow the target
424 // to decide whether the loaded value is actually a constant. If so, we can
425 // actually use it as a load.
426 if (!MI->isDereferenceableInvariantLoad())
427 // FIXME: we should be able to hoist loads with no other side effects if
428 // there are no other instructions which can change memory in this loop.
429 // This is a trivial form of alias analysis.
430 return false;
431 }
432
433 // Ignore stack guard loads, otherwise the register that holds CSEed value may
434 // be spilled and get loaded back with corrupted data.
435 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
436 return false;
437
438 return true;
439}
440
441/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
442/// common expression that defines Reg. CSBB is basic block where CSReg is
443/// defined.
444bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
447 return true;
448
449 // FIXME: Heuristics that works around the lack the live range splitting.
450
451 // If CSReg is used at all uses of Reg, CSE should not increase register
452 // pressure of CSReg.
453 bool MayIncreasePressure = true;
454 if (CSReg.isVirtual() && Reg.isVirtual()) {
455 MayIncreasePressure = false;
457 int NumOfUses = 0;
458 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
459 CSUses.insert(&MI);
460 // Too costly to compute if NumOfUses is very large. Conservatively assume
461 // MayIncreasePressure to avoid spending too much time here.
462 if (++NumOfUses > CSUsesThreshold) {
463 MayIncreasePressure = true;
464 break;
465 }
466 }
467 if (!MayIncreasePressure)
468 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
469 if (!CSUses.count(&MI)) {
470 MayIncreasePressure = true;
471 break;
472 }
473 }
474 }
475 if (!MayIncreasePressure) return true;
476
477 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
478 // an immediate predecessor. We don't want to increase register pressure and
479 // end up causing other computation to be spilled.
480 if (TII->isAsCheapAsAMove(*MI)) {
481 MachineBasicBlock *BB = MI->getParent();
482 if (CSBB != BB && !CSBB->isSuccessor(BB))
483 return false;
484 }
485
486 // Heuristics #2: If the expression doesn't not use a vr and the only use
487 // of the redundant computation are copies, do not cse.
488 bool HasVRegUse = false;
489 for (const MachineOperand &MO : MI->all_uses()) {
490 if (MO.getReg().isVirtual()) {
491 HasVRegUse = true;
492 break;
493 }
494 }
495 if (!HasVRegUse) {
496 bool HasNonCopyUse = false;
497 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
498 // Ignore copies.
499 if (!MI.isCopyLike()) {
500 HasNonCopyUse = true;
501 break;
502 }
503 }
504 if (!HasNonCopyUse)
505 return false;
506 }
507
508 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
509 // it unless the defined value is already used in the BB of the new use.
510 bool HasPHI = false;
511 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
512 HasPHI |= UseMI.isPHI();
513 if (UseMI.getParent() == MI->getParent())
514 return true;
515 }
516
517 return !HasPHI;
518}
519
520void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
521 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
522 ScopeType *Scope = new ScopeType(VNT);
523 ScopeMap[MBB] = Scope;
524}
525
526void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
527 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
529 assert(SI != ScopeMap.end());
530 delete SI->second;
531 ScopeMap.erase(SI);
532}
533
534bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
535 bool Changed = false;
536
538 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
539 SmallVector<unsigned, 2> ImplicitDefs;
541 if (!isCSECandidate(&MI))
542 continue;
543
544 bool FoundCSE = VNT.count(&MI);
545 if (!FoundCSE) {
546 // Using trivial copy propagation to find more CSE opportunities.
547 if (PerformTrivialCopyPropagation(&MI, MBB)) {
548 Changed = true;
549
550 // After coalescing MI itself may become a copy.
551 if (MI.isCopyLike())
552 continue;
553
554 // Try again to see if CSE is possible.
555 FoundCSE = VNT.count(&MI);
556 }
557 }
558
559 // Commute commutable instructions.
560 bool Commuted = false;
561 if (!FoundCSE && MI.isCommutable()) {
562 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
563 Commuted = true;
564 FoundCSE = VNT.count(NewMI);
565 if (NewMI != &MI) {
566 // New instruction. It doesn't need to be kept.
567 NewMI->eraseFromParent();
568 Changed = true;
569 } else if (!FoundCSE)
570 // MI was changed but it didn't help, commute it back!
571 (void)TII->commuteInstruction(MI);
572 }
573 }
574
575 // If the instruction defines physical registers and the values *may* be
576 // used, then it's not safe to replace it with a common subexpression.
577 // It's also not safe if the instruction uses physical registers.
578 bool CrossMBBPhysDef = false;
580 PhysDefVector PhysDefs;
581 bool PhysUseDef = false;
582 if (FoundCSE &&
583 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
584 FoundCSE = false;
585
586 // ... Unless the CS is local or is in the sole predecessor block
587 // and it also defines the physical register which is not clobbered
588 // in between and the physical register uses were not clobbered.
589 // This can never be the case if the instruction both uses and
590 // defines the same physical register, which was detected above.
591 if (!PhysUseDef) {
592 unsigned CSVN = VNT.lookup(&MI);
593 MachineInstr *CSMI = Exps[CSVN];
594 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
595 FoundCSE = true;
596 }
597 }
598
599 if (!FoundCSE) {
600 VNT.insert(&MI, CurrVN++);
601 Exps.push_back(&MI);
602 continue;
603 }
604
605 // Found a common subexpression, eliminate it.
606 unsigned CSVN = VNT.lookup(&MI);
607 MachineInstr *CSMI = Exps[CSVN];
608 LLVM_DEBUG(dbgs() << "Examining: " << MI);
609 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
610
611 // Prevent CSE-ing non-local convergent instructions.
612 // LLVM's current definition of `isConvergent` does not necessarily prove
613 // that non-local CSE is illegal. The following check extends the definition
614 // of `isConvergent` to assume a convergent instruction is dependent not
615 // only on additional conditions, but also on fewer conditions. LLVM does
616 // not have a MachineInstr attribute which expresses this extended
617 // definition, so it's necessary to use `isConvergent` to prevent illegally
618 // CSE-ing the subset of `isConvergent` instructions which do fall into this
619 // extended definition.
620 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
621 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
622 "different BBs, avoid CSE!\n");
623 VNT.insert(&MI, CurrVN++);
624 Exps.push_back(&MI);
625 continue;
626 }
627
628 // Check if it's profitable to perform this CSE.
629 bool DoCSE = true;
630 unsigned NumDefs = MI.getNumDefs();
631
632 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
633 MachineOperand &MO = MI.getOperand(i);
634 if (!MO.isReg() || !MO.isDef())
635 continue;
636 Register OldReg = MO.getReg();
637 Register NewReg = CSMI->getOperand(i).getReg();
638
639 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
640 // we should make sure it is not dead at CSMI.
641 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
642 ImplicitDefsToUpdate.push_back(i);
643
644 // Keep track of implicit defs of CSMI and MI, to clear possibly
645 // made-redundant kill flags.
646 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
647 ImplicitDefs.push_back(OldReg);
648
649 if (OldReg == NewReg) {
650 --NumDefs;
651 continue;
652 }
653
654 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
655 "Do not CSE physical register defs!");
656
657 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
658 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
659 DoCSE = false;
660 break;
661 }
662
663 // Don't perform CSE if the result of the new instruction cannot exist
664 // within the constraints (register class, bank, or low-level type) of
665 // the old instruction.
666 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
668 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
669 DoCSE = false;
670 break;
671 }
672
673 CSEPairs.push_back(std::make_pair(OldReg, NewReg));
674 --NumDefs;
675 }
676
677 // Actually perform the elimination.
678 if (DoCSE) {
679 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
680 unsigned OldReg = CSEPair.first;
681 unsigned NewReg = CSEPair.second;
682 // OldReg may have been unused but is used now, clear the Dead flag
683 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
684 assert(Def != nullptr && "CSEd register has no unique definition?");
685 Def->clearRegisterDeads(NewReg);
686 // Replace with NewReg and clear kill flags which may be wrong now.
687 MRI->replaceRegWith(OldReg, NewReg);
688 MRI->clearKillFlags(NewReg);
689 }
690
691 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
692 // we should make sure it is not dead at CSMI.
693 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
694 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
695 for (const auto &PhysDef : PhysDefs)
696 if (!MI.getOperand(PhysDef.first).isDead())
697 CSMI->getOperand(PhysDef.first).setIsDead(false);
698
699 // Go through implicit defs of CSMI and MI, and clear the kill flags on
700 // their uses in all the instructions between CSMI and MI.
701 // We might have made some of the kill flags redundant, consider:
702 // subs ... implicit-def %nzcv <- CSMI
703 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
704 // subs ... implicit-def %nzcv <- MI, to be eliminated
705 // csinc ... implicit killed %nzcv
706 // Since we eliminated MI, and reused a register imp-def'd by CSMI
707 // (here %nzcv), that register, if it was killed before MI, should have
708 // that kill flag removed, because it's lifetime was extended.
709 if (CSMI->getParent() == MI.getParent()) {
710 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
711 for (auto ImplicitDef : ImplicitDefs)
712 if (MachineOperand *MO = II->findRegisterUseOperand(
713 ImplicitDef, /*isKill=*/true, TRI))
714 MO->setIsKill(false);
715 } else {
716 // If the instructions aren't in the same BB, bail out and clear the
717 // kill flag on all uses of the imp-def'd register.
718 for (auto ImplicitDef : ImplicitDefs)
719 MRI->clearKillFlags(ImplicitDef);
720 }
721
722 if (CrossMBBPhysDef) {
723 // Add physical register defs now coming in from a predecessor to MBB
724 // livein list.
725 while (!PhysDefs.empty()) {
726 auto LiveIn = PhysDefs.pop_back_val();
727 if (!MBB->isLiveIn(LiveIn.second))
728 MBB->addLiveIn(LiveIn.second);
729 }
730 ++NumCrossBBCSEs;
731 }
732
733 MI.eraseFromParent();
734 ++NumCSEs;
735 if (!PhysRefs.empty())
736 ++NumPhysCSEs;
737 if (Commuted)
738 ++NumCommutes;
739 Changed = true;
740 } else {
741 VNT.insert(&MI, CurrVN++);
742 Exps.push_back(&MI);
743 }
744 CSEPairs.clear();
745 ImplicitDefsToUpdate.clear();
746 ImplicitDefs.clear();
747 }
748
749 return Changed;
750}
751
752/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
753/// dominator tree node if its a leaf or all of its children are done. Walk
754/// up the dominator tree to destroy ancestors which are now done.
755void
756MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
758 if (OpenChildren[Node])
759 return;
760
761 // Pop scope.
762 ExitScope(Node->getBlock());
763
764 // Now traverse upwards to pop ancestors whose offsprings are all done.
765 while (MachineDomTreeNode *Parent = Node->getIDom()) {
766 unsigned Left = --OpenChildren[Parent];
767 if (Left != 0)
768 break;
769 ExitScope(Parent->getBlock());
770 Node = Parent;
771 }
772}
773
774bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
778
779 CurrVN = 0;
780
781 // Perform a DFS walk to determine the order of visit.
782 WorkList.push_back(Node);
783 do {
784 Node = WorkList.pop_back_val();
785 Scopes.push_back(Node);
786 OpenChildren[Node] = Node->getNumChildren();
787 append_range(WorkList, Node->children());
788 } while (!WorkList.empty());
789
790 // Now perform CSE.
791 bool Changed = false;
792 for (MachineDomTreeNode *Node : Scopes) {
793 MachineBasicBlock *MBB = Node->getBlock();
794 EnterScope(MBB);
795 Changed |= ProcessBlockCSE(MBB);
796 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
797 ExitScopeIfDone(Node, OpenChildren);
798 }
799
800 return Changed;
801}
802
803// We use stronger checks for PRE candidate rather than for CSE ones to embrace
804// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
805// to exclude instrs created by PRE that won't be CSEed later.
806bool MachineCSE::isPRECandidate(MachineInstr *MI,
807 SmallSet<MCRegister, 8> &PhysRefs) {
808 if (!isCSECandidate(MI) ||
809 MI->isNotDuplicable() ||
810 MI->mayLoad() ||
812 MI->getNumDefs() != 1 ||
813 MI->getNumExplicitDefs() != 1)
814 return false;
815
816 for (const MachineOperand &MO : MI->operands()) {
817 if (MO.isReg() && !MO.getReg().isVirtual()) {
818 if (MO.isDef())
819 return false;
820 else
821 PhysRefs.insert(MO.getReg());
822 }
823 }
824
825 return true;
826}
827
828bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
830 bool Changed = false;
833 if (!isPRECandidate(&MI, PhysRefs))
834 continue;
835
836 if (!PREMap.count(&MI)) {
837 PREMap[&MI] = MBB;
838 continue;
839 }
840
841 auto MBB1 = PREMap[&MI];
842 assert(
843 !DT->properlyDominates(MBB, MBB1) &&
844 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
845 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
846 if (!CMBB->isLegalToHoistInto())
847 continue;
848
849 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
850 continue;
851
852 // Two instrs are partial redundant if their basic blocks are reachable
853 // from one to another but one doesn't dominate another.
854 if (CMBB != MBB1) {
855 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
856 if (BB != nullptr && BB1 != nullptr &&
857 (isPotentiallyReachable(BB1, BB) ||
858 isPotentiallyReachable(BB, BB1))) {
859 // The following check extends the definition of `isConvergent` to
860 // assume a convergent instruction is dependent not only on additional
861 // conditions, but also on fewer conditions. LLVM does not have a
862 // MachineInstr attribute which expresses this extended definition, so
863 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
864 // subset of `isConvergent` instructions which do fall into this
865 // extended definition.
866 if (MI.isConvergent() && CMBB != MBB)
867 continue;
868
869 // If this instruction uses physical registers then we can only do PRE
870 // if it's using the value that is live at the place we're hoisting to.
871 bool NonLocal;
872 PhysDefVector PhysDefs;
873 if (!PhysRefs.empty() &&
874 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
875 PhysDefs, NonLocal))
876 continue;
877
878 assert(MI.getOperand(0).isDef() &&
879 "First operand of instr with one explicit def must be this def");
880 Register VReg = MI.getOperand(0).getReg();
881 Register NewReg = MRI->cloneVirtualRegister(VReg);
882 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
883 continue;
884 MachineInstr &NewMI =
885 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
886
887 // When hoisting, make sure we don't carry the debug location of
888 // the original instruction, as that's not correct and can cause
889 // unexpected jumps when debugging optimized code.
890 auto EmptyDL = DebugLoc();
891 NewMI.setDebugLoc(EmptyDL);
892
893 NewMI.getOperand(0).setReg(NewReg);
894
895 PREMap[&MI] = CMBB;
896 ++NumPREs;
897 Changed = true;
898 }
899 }
900 }
901 return Changed;
902}
903
904// This simple PRE (partial redundancy elimination) pass doesn't actually
905// eliminate partial redundancy but transforms it to full redundancy,
906// anticipating that the next CSE step will eliminate this created redundancy.
907// If CSE doesn't eliminate this, than created instruction will remain dead
908// and eliminated later by Remove Dead Machine Instructions pass.
909bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
911
912 PREMap.clear();
913 bool Changed = false;
914 BBs.push_back(DT->getRootNode());
915 do {
916 auto Node = BBs.pop_back_val();
917 append_range(BBs, Node->children());
918
919 MachineBasicBlock *MBB = Node->getBlock();
920 Changed |= ProcessBlockPRE(DT, MBB);
921
922 } while (!BBs.empty());
923
924 return Changed;
925}
926
927bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
929 MachineBasicBlock *MBB1) {
930 if (CandidateBB->getParent()->getFunction().hasMinSize())
931 return true;
932 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
933 assert(DT->dominates(CandidateBB, MBB1) &&
934 "CandidateBB should dominate MBB1");
935 return MBFI->getBlockFreq(CandidateBB) <=
936 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
937}
938
939bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
940 if (skipFunction(MF.getFunction()))
941 return false;
942
945 MRI = &MF.getRegInfo();
946 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
947 DT = &getAnalysis<MachineDominatorTree>();
948 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
949 LookAheadLimit = TII->getMachineCSELookAheadLimit();
950 bool ChangedPRE, ChangedCSE;
951 ChangedPRE = PerformSimplePRE(DT);
952 ChangedCSE = PerformCSE(DT->getRootNode());
953 return ChangedPRE || ChangedCSE;
954}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Definition: MachineCSE.cpp:269
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:173
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
#define DEBUG_TYPE
Definition: MachineCSE.cpp:51
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:674
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2375
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2042
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:375
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:166
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...