LLVM 20.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
31#include "llvm/CodeGen/Passes.h"
37#include "llvm/MC/MCRegister.h"
39#include "llvm/Pass.h"
41#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <iterator>
46#include <utility>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "machine-cse"
51
52STATISTIC(NumCoalesces, "Number of copies coalesced");
53STATISTIC(NumCSEs, "Number of common subexpression eliminated");
54STATISTIC(NumPREs, "Number of partial redundant expression"
55 " transformed to fully redundant");
56STATISTIC(NumPhysCSEs,
57 "Number of physreg referencing common subexpr eliminated");
58STATISTIC(NumCrossBBCSEs,
59 "Number of cross-MBB physreg referencing CS eliminated");
60STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
61
62// Threshold to avoid excessive cost to compute isProfitableToCSE.
63static cl::opt<int>
64 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
65 cl::desc("Threshold for the size of CSUses"));
66
68 "aggressive-machine-cse", cl::Hidden, cl::init(false),
69 cl::desc("Override the profitability heuristics for Machine CSE"));
70
71namespace {
72
73 class MachineCSE : public MachineFunctionPass {
74 const TargetInstrInfo *TII = nullptr;
75 const TargetRegisterInfo *TRI = nullptr;
76 AliasAnalysis *AA = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81 public:
82 static char ID; // Pass identification
83
84 MachineCSE() : MachineFunctionPass(ID) {
86 }
87
88 bool runOnMachineFunction(MachineFunction &MF) override;
89
90 void getAnalysisUsage(AnalysisUsage &AU) const override {
91 AU.setPreservesCFG();
99 }
100
103 .set(MachineFunctionProperties::Property::IsSSA);
104 }
105
106 void releaseMemory() override {
107 ScopeMap.clear();
108 PREMap.clear();
109 Exps.clear();
110 }
111
112 private:
113 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
115 using ScopedHTType =
117 AllocatorTy>;
118 using ScopeType = ScopedHTType::ScopeTy;
119 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
120
121 unsigned LookAheadLimit = 0;
124 PREMap;
125 ScopedHTType VNT;
127 unsigned CurrVN = 0;
128
129 bool PerformTrivialCopyPropagation(MachineInstr *MI,
131 bool isPhysDefTriviallyDead(MCRegister Reg,
134 bool hasLivePhysRegDefUses(const MachineInstr *MI,
135 const MachineBasicBlock *MBB,
136 SmallSet<MCRegister, 8> &PhysRefs,
137 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
138 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
139 SmallSet<MCRegister, 8> &PhysRefs,
140 PhysDefVector &PhysDefs, bool &NonLocal) const;
141 bool isCSECandidate(MachineInstr *MI);
142 bool isProfitableToCSE(Register CSReg, Register Reg,
144 void EnterScope(MachineBasicBlock *MBB);
145 void ExitScope(MachineBasicBlock *MBB);
146 bool ProcessBlockCSE(MachineBasicBlock *MBB);
147 void ExitScopeIfDone(MachineDomTreeNode *Node,
149 bool PerformCSE(MachineDomTreeNode *Node);
150
151 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
152 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
153 bool PerformSimplePRE(MachineDominatorTree *DT);
154 /// Heuristics to see if it's profitable to move common computations of MBB
155 /// and MBB1 to CandidateBB.
156 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
158 MachineBasicBlock *MBB1);
159 };
160
161} // end anonymous namespace
162
163char MachineCSE::ID = 0;
164
165char &llvm::MachineCSEID = MachineCSE::ID;
166
168 "Machine Common Subexpression Elimination", false, false)
172 "Machine Common Subexpression Elimination", false, false)
173
174/// The source register of a COPY machine instruction can be propagated to all
175/// its users, and this propagation could increase the probability of finding
176/// common subexpressions. If the COPY has only one user, the COPY itself can
177/// be removed.
178bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
180 bool Changed = false;
181 for (MachineOperand &MO : MI->all_uses()) {
182 Register Reg = MO.getReg();
183 if (!Reg.isVirtual())
184 continue;
185 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
186 MachineInstr *DefMI = MRI->getVRegDef(Reg);
187 if (!DefMI || !DefMI->isCopy())
188 continue;
189 Register SrcReg = DefMI->getOperand(1).getReg();
190 if (!SrcReg.isVirtual())
191 continue;
192 if (DefMI->getOperand(0).getSubReg())
193 continue;
194 // FIXME: We should trivially coalesce subregister copies to expose CSE
195 // opportunities on instructions with truncated operands (see
196 // cse-add-with-overflow.ll). This can be done here as follows:
197 // if (SrcSubReg)
198 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
199 // SrcSubReg);
200 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
201 //
202 // The 2-addr pass has been updated to handle coalesced subregs. However,
203 // some machine-specific code still can't handle it.
204 // To handle it properly we also need a way find a constrained subregister
205 // class given a super-reg class and subreg index.
206 if (DefMI->getOperand(1).getSubReg())
207 continue;
208 if (!MRI->constrainRegAttrs(SrcReg, Reg))
209 continue;
210 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
211 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
212
213 // Propagate SrcReg of copies to MI.
214 MO.setReg(SrcReg);
215 MRI->clearKillFlags(SrcReg);
216 // Coalesce single use copies.
217 if (OnlyOneUse) {
218 // If (and only if) we've eliminated all uses of the copy, also
219 // copy-propagate to any debug-users of MI, or they'll be left using
220 // an undefined value.
222
224 ++NumCoalesces;
225 }
226 Changed = true;
227 }
228
229 return Changed;
230}
231
232bool MachineCSE::isPhysDefTriviallyDead(
235 unsigned LookAheadLeft = LookAheadLimit;
236 while (LookAheadLeft) {
237 // Skip over dbg_value's.
239
240 if (I == E)
241 // Reached end of block, we don't know if register is dead or not.
242 return false;
243
244 bool SeenDef = false;
245 for (const MachineOperand &MO : I->operands()) {
246 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
247 SeenDef = true;
248 if (!MO.isReg() || !MO.getReg())
249 continue;
250 if (!TRI->regsOverlap(MO.getReg(), Reg))
251 continue;
252 if (MO.isUse())
253 // Found a use!
254 return false;
255 SeenDef = true;
256 }
257 if (SeenDef)
258 // See a def of Reg (or an alias) before encountering any use, it's
259 // trivially dead.
260 return true;
261
262 --LookAheadLeft;
263 ++I;
264 }
265 return false;
266}
267
269 const MachineOperand &MO,
270 const MachineFunction &MF,
271 const TargetRegisterInfo &TRI,
272 const TargetInstrInfo &TII) {
273 // MachineRegisterInfo::isConstantPhysReg directly called by
274 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
275 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
276 // most (if not all) targets freeze reserved registers right after ISel.
277 //
278 // It does cause issues mid-GlobalISel, however, hence the additional
279 // reservedRegsFrozen check.
280 const MachineRegisterInfo &MRI = MF.getRegInfo();
281 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
282 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
283}
284
285/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
286/// physical registers (except for dead defs of physical registers). It also
287/// returns the physical register def by reference if it's the only one and the
288/// instruction does not uses a physical register.
289bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
290 const MachineBasicBlock *MBB,
291 SmallSet<MCRegister, 8> &PhysRefs,
292 PhysDefVector &PhysDefs,
293 bool &PhysUseDef) const {
294 // First, add all uses to PhysRefs.
295 for (const MachineOperand &MO : MI->all_uses()) {
296 Register Reg = MO.getReg();
297 if (!Reg)
298 continue;
299 if (Reg.isVirtual())
300 continue;
301 // Reading either caller preserved or constant physregs is ok.
302 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
303 *TII))
304 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
305 PhysRefs.insert(*AI);
306 }
307
308 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
309 // (which currently contains only uses), set the PhysUseDef flag.
310 PhysUseDef = false;
311 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
312 for (const auto &MOP : llvm::enumerate(MI->operands())) {
313 const MachineOperand &MO = MOP.value();
314 if (!MO.isReg() || !MO.isDef())
315 continue;
316 Register Reg = MO.getReg();
317 if (!Reg)
318 continue;
319 if (Reg.isVirtual())
320 continue;
321 // Check against PhysRefs even if the def is "dead".
322 if (PhysRefs.count(Reg.asMCReg()))
323 PhysUseDef = true;
324 // If the def is dead, it's ok. But the def may not marked "dead". That's
325 // common since this pass is run before livevariables. We can scan
326 // forward a few instructions and check if it is obviously dead.
327 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
328 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
329 }
330
331 // Finally, add all defs to PhysRefs as well.
332 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
333 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
334 ++AI)
335 PhysRefs.insert(*AI);
336
337 return !PhysRefs.empty();
338}
339
340bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
341 SmallSet<MCRegister, 8> &PhysRefs,
342 PhysDefVector &PhysDefs,
343 bool &NonLocal) const {
344 // For now conservatively returns false if the common subexpression is
345 // not in the same basic block as the given instruction. The only exception
346 // is if the common subexpression is in the sole predecessor block.
347 const MachineBasicBlock *MBB = MI->getParent();
348 const MachineBasicBlock *CSMBB = CSMI->getParent();
349
350 bool CrossMBB = false;
351 if (CSMBB != MBB) {
352 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
353 return false;
354
355 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
356 if (MRI->isAllocatable(PhysDefs[i].second) ||
357 MRI->isReserved(PhysDefs[i].second))
358 // Avoid extending live range of physical registers if they are
359 //allocatable or reserved.
360 return false;
361 }
362 CrossMBB = true;
363 }
364 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
367 unsigned LookAheadLeft = LookAheadLimit;
368 while (LookAheadLeft) {
369 // Skip over dbg_value's.
370 while (I != E && I != EE && I->isDebugInstr())
371 ++I;
372
373 if (I == EE) {
374 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
375 (void)CrossMBB;
376 CrossMBB = false;
377 NonLocal = true;
378 I = MBB->begin();
379 EE = MBB->end();
380 continue;
381 }
382
383 if (I == E)
384 return true;
385
386 for (const MachineOperand &MO : I->operands()) {
387 // RegMasks go on instructions like calls that clobber lots of physregs.
388 // Don't attempt to CSE across such an instruction.
389 if (MO.isRegMask())
390 return false;
391 if (!MO.isReg() || !MO.isDef())
392 continue;
393 Register MOReg = MO.getReg();
394 if (MOReg.isVirtual())
395 continue;
396 if (PhysRefs.count(MOReg.asMCReg()))
397 return false;
398 }
399
400 --LookAheadLeft;
401 ++I;
402 }
403
404 return false;
405}
406
407bool MachineCSE::isCSECandidate(MachineInstr *MI) {
408 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
409 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
410 return false;
411
412 // Ignore copies.
413 if (MI->isCopyLike())
414 return false;
415
416 // Ignore stuff that we obviously can't move.
417 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
418 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
419 return false;
420
421 if (MI->mayLoad()) {
422 // Okay, this instruction does a load. As a refinement, we allow the target
423 // to decide whether the loaded value is actually a constant. If so, we can
424 // actually use it as a load.
425 if (!MI->isDereferenceableInvariantLoad())
426 // FIXME: we should be able to hoist loads with no other side effects if
427 // there are no other instructions which can change memory in this loop.
428 // This is a trivial form of alias analysis.
429 return false;
430 }
431
432 // Ignore stack guard loads, otherwise the register that holds CSEed value may
433 // be spilled and get loaded back with corrupted data.
434 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
435 return false;
436
437 return true;
438}
439
440/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
441/// common expression that defines Reg. CSBB is basic block where CSReg is
442/// defined.
443bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
446 return true;
447
448 // FIXME: Heuristics that works around the lack the live range splitting.
449
450 // If CSReg is used at all uses of Reg, CSE should not increase register
451 // pressure of CSReg.
452 bool MayIncreasePressure = true;
453 if (CSReg.isVirtual() && Reg.isVirtual()) {
454 MayIncreasePressure = false;
456 int NumOfUses = 0;
457 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
458 CSUses.insert(&MI);
459 // Too costly to compute if NumOfUses is very large. Conservatively assume
460 // MayIncreasePressure to avoid spending too much time here.
461 if (++NumOfUses > CSUsesThreshold) {
462 MayIncreasePressure = true;
463 break;
464 }
465 }
466 if (!MayIncreasePressure)
467 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
468 if (!CSUses.count(&MI)) {
469 MayIncreasePressure = true;
470 break;
471 }
472 }
473 }
474 if (!MayIncreasePressure) return true;
475
476 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
477 // an immediate predecessor. We don't want to increase register pressure and
478 // end up causing other computation to be spilled.
479 if (TII->isAsCheapAsAMove(*MI)) {
480 MachineBasicBlock *BB = MI->getParent();
481 if (CSBB != BB && !CSBB->isSuccessor(BB))
482 return false;
483 }
484
485 // Heuristics #2: If the expression doesn't not use a vr and the only use
486 // of the redundant computation are copies, do not cse.
487 bool HasVRegUse = false;
488 for (const MachineOperand &MO : MI->all_uses()) {
489 if (MO.getReg().isVirtual()) {
490 HasVRegUse = true;
491 break;
492 }
493 }
494 if (!HasVRegUse) {
495 bool HasNonCopyUse = false;
496 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
497 // Ignore copies.
498 if (!MI.isCopyLike()) {
499 HasNonCopyUse = true;
500 break;
501 }
502 }
503 if (!HasNonCopyUse)
504 return false;
505 }
506
507 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
508 // it unless the defined value is already used in the BB of the new use.
509 bool HasPHI = false;
510 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
511 HasPHI |= UseMI.isPHI();
512 if (UseMI.getParent() == MI->getParent())
513 return true;
514 }
515
516 return !HasPHI;
517}
518
519void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
520 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
521 ScopeType *Scope = new ScopeType(VNT);
522 ScopeMap[MBB] = Scope;
523}
524
525void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
526 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
528 assert(SI != ScopeMap.end());
529 delete SI->second;
530 ScopeMap.erase(SI);
531}
532
533bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
534 bool Changed = false;
535
537 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
538 SmallVector<unsigned, 2> ImplicitDefs;
540 if (!isCSECandidate(&MI))
541 continue;
542
543 bool FoundCSE = VNT.count(&MI);
544 if (!FoundCSE) {
545 // Using trivial copy propagation to find more CSE opportunities.
546 if (PerformTrivialCopyPropagation(&MI, MBB)) {
547 Changed = true;
548
549 // After coalescing MI itself may become a copy.
550 if (MI.isCopyLike())
551 continue;
552
553 // Try again to see if CSE is possible.
554 FoundCSE = VNT.count(&MI);
555 }
556 }
557
558 // Commute commutable instructions.
559 bool Commuted = false;
560 if (!FoundCSE && MI.isCommutable()) {
561 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
562 Commuted = true;
563 FoundCSE = VNT.count(NewMI);
564 if (NewMI != &MI) {
565 // New instruction. It doesn't need to be kept.
566 NewMI->eraseFromParent();
567 Changed = true;
568 } else if (!FoundCSE)
569 // MI was changed but it didn't help, commute it back!
570 (void)TII->commuteInstruction(MI);
571 }
572 }
573
574 // If the instruction defines physical registers and the values *may* be
575 // used, then it's not safe to replace it with a common subexpression.
576 // It's also not safe if the instruction uses physical registers.
577 bool CrossMBBPhysDef = false;
579 PhysDefVector PhysDefs;
580 bool PhysUseDef = false;
581 if (FoundCSE &&
582 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
583 FoundCSE = false;
584
585 // ... Unless the CS is local or is in the sole predecessor block
586 // and it also defines the physical register which is not clobbered
587 // in between and the physical register uses were not clobbered.
588 // This can never be the case if the instruction both uses and
589 // defines the same physical register, which was detected above.
590 if (!PhysUseDef) {
591 unsigned CSVN = VNT.lookup(&MI);
592 MachineInstr *CSMI = Exps[CSVN];
593 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
594 FoundCSE = true;
595 }
596 }
597
598 if (!FoundCSE) {
599 VNT.insert(&MI, CurrVN++);
600 Exps.push_back(&MI);
601 continue;
602 }
603
604 // Found a common subexpression, eliminate it.
605 unsigned CSVN = VNT.lookup(&MI);
606 MachineInstr *CSMI = Exps[CSVN];
607 LLVM_DEBUG(dbgs() << "Examining: " << MI);
608 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
609
610 // Prevent CSE-ing non-local convergent instructions.
611 // LLVM's current definition of `isConvergent` does not necessarily prove
612 // that non-local CSE is illegal. The following check extends the definition
613 // of `isConvergent` to assume a convergent instruction is dependent not
614 // only on additional conditions, but also on fewer conditions. LLVM does
615 // not have a MachineInstr attribute which expresses this extended
616 // definition, so it's necessary to use `isConvergent` to prevent illegally
617 // CSE-ing the subset of `isConvergent` instructions which do fall into this
618 // extended definition.
619 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
620 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
621 "different BBs, avoid CSE!\n");
622 VNT.insert(&MI, CurrVN++);
623 Exps.push_back(&MI);
624 continue;
625 }
626
627 // Check if it's profitable to perform this CSE.
628 bool DoCSE = true;
629 unsigned NumDefs = MI.getNumDefs();
630
631 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
632 MachineOperand &MO = MI.getOperand(i);
633 if (!MO.isReg() || !MO.isDef())
634 continue;
635 Register OldReg = MO.getReg();
636 Register NewReg = CSMI->getOperand(i).getReg();
637
638 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
639 // we should make sure it is not dead at CSMI.
640 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
641 ImplicitDefsToUpdate.push_back(i);
642
643 // Keep track of implicit defs of CSMI and MI, to clear possibly
644 // made-redundant kill flags.
645 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
646 ImplicitDefs.push_back(OldReg);
647
648 if (OldReg == NewReg) {
649 --NumDefs;
650 continue;
651 }
652
653 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
654 "Do not CSE physical register defs!");
655
656 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
657 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
658 DoCSE = false;
659 break;
660 }
661
662 // Don't perform CSE if the result of the new instruction cannot exist
663 // within the constraints (register class, bank, or low-level type) of
664 // the old instruction.
665 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
667 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
668 DoCSE = false;
669 break;
670 }
671
672 CSEPairs.push_back(std::make_pair(OldReg, NewReg));
673 --NumDefs;
674 }
675
676 // Actually perform the elimination.
677 if (DoCSE) {
678 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
679 unsigned OldReg = CSEPair.first;
680 unsigned NewReg = CSEPair.second;
681 // OldReg may have been unused but is used now, clear the Dead flag
682 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
683 assert(Def != nullptr && "CSEd register has no unique definition?");
684 Def->clearRegisterDeads(NewReg);
685 // Replace with NewReg and clear kill flags which may be wrong now.
686 MRI->replaceRegWith(OldReg, NewReg);
687 MRI->clearKillFlags(NewReg);
688 }
689
690 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
691 // we should make sure it is not dead at CSMI.
692 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
693 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
694 for (const auto &PhysDef : PhysDefs)
695 if (!MI.getOperand(PhysDef.first).isDead())
696 CSMI->getOperand(PhysDef.first).setIsDead(false);
697
698 // Go through implicit defs of CSMI and MI, and clear the kill flags on
699 // their uses in all the instructions between CSMI and MI.
700 // We might have made some of the kill flags redundant, consider:
701 // subs ... implicit-def %nzcv <- CSMI
702 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
703 // subs ... implicit-def %nzcv <- MI, to be eliminated
704 // csinc ... implicit killed %nzcv
705 // Since we eliminated MI, and reused a register imp-def'd by CSMI
706 // (here %nzcv), that register, if it was killed before MI, should have
707 // that kill flag removed, because it's lifetime was extended.
708 if (CSMI->getParent() == MI.getParent()) {
709 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
710 for (auto ImplicitDef : ImplicitDefs)
711 if (MachineOperand *MO = II->findRegisterUseOperand(
712 ImplicitDef, TRI, /*isKill=*/true))
713 MO->setIsKill(false);
714 } else {
715 // If the instructions aren't in the same BB, bail out and clear the
716 // kill flag on all uses of the imp-def'd register.
717 for (auto ImplicitDef : ImplicitDefs)
718 MRI->clearKillFlags(ImplicitDef);
719 }
720
721 if (CrossMBBPhysDef) {
722 // Add physical register defs now coming in from a predecessor to MBB
723 // livein list.
724 while (!PhysDefs.empty()) {
725 auto LiveIn = PhysDefs.pop_back_val();
726 if (!MBB->isLiveIn(LiveIn.second))
727 MBB->addLiveIn(LiveIn.second);
728 }
729 ++NumCrossBBCSEs;
730 }
731
732 MI.eraseFromParent();
733 ++NumCSEs;
734 if (!PhysRefs.empty())
735 ++NumPhysCSEs;
736 if (Commuted)
737 ++NumCommutes;
738 Changed = true;
739 } else {
740 VNT.insert(&MI, CurrVN++);
741 Exps.push_back(&MI);
742 }
743 CSEPairs.clear();
744 ImplicitDefsToUpdate.clear();
745 ImplicitDefs.clear();
746 }
747
748 return Changed;
749}
750
751/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
752/// dominator tree node if its a leaf or all of its children are done. Walk
753/// up the dominator tree to destroy ancestors which are now done.
754void
755MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
757 if (OpenChildren[Node])
758 return;
759
760 // Pop scope.
761 ExitScope(Node->getBlock());
762
763 // Now traverse upwards to pop ancestors whose offsprings are all done.
764 while (MachineDomTreeNode *Parent = Node->getIDom()) {
765 unsigned Left = --OpenChildren[Parent];
766 if (Left != 0)
767 break;
768 ExitScope(Parent->getBlock());
769 Node = Parent;
770 }
771}
772
773bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
777
778 CurrVN = 0;
779
780 // Perform a DFS walk to determine the order of visit.
781 WorkList.push_back(Node);
782 do {
783 Node = WorkList.pop_back_val();
784 Scopes.push_back(Node);
785 OpenChildren[Node] = Node->getNumChildren();
786 append_range(WorkList, Node->children());
787 } while (!WorkList.empty());
788
789 // Now perform CSE.
790 bool Changed = false;
791 for (MachineDomTreeNode *Node : Scopes) {
792 MachineBasicBlock *MBB = Node->getBlock();
793 EnterScope(MBB);
794 Changed |= ProcessBlockCSE(MBB);
795 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
796 ExitScopeIfDone(Node, OpenChildren);
797 }
798
799 return Changed;
800}
801
802// We use stronger checks for PRE candidate rather than for CSE ones to embrace
803// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
804// to exclude instrs created by PRE that won't be CSEed later.
805bool MachineCSE::isPRECandidate(MachineInstr *MI,
806 SmallSet<MCRegister, 8> &PhysRefs) {
807 if (!isCSECandidate(MI) ||
808 MI->isNotDuplicable() ||
809 MI->mayLoad() ||
811 MI->getNumDefs() != 1 ||
812 MI->getNumExplicitDefs() != 1)
813 return false;
814
815 for (const MachineOperand &MO : MI->operands()) {
816 if (MO.isReg() && !MO.getReg().isVirtual()) {
817 if (MO.isDef())
818 return false;
819 else
820 PhysRefs.insert(MO.getReg());
821 }
822 }
823
824 return true;
825}
826
827bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
829 bool Changed = false;
832 if (!isPRECandidate(&MI, PhysRefs))
833 continue;
834
835 if (!PREMap.count(&MI)) {
836 PREMap[&MI] = MBB;
837 continue;
838 }
839
840 auto MBB1 = PREMap[&MI];
841 assert(
842 !DT->properlyDominates(MBB, MBB1) &&
843 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
844 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
845 if (!CMBB->isLegalToHoistInto())
846 continue;
847
848 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
849 continue;
850
851 // Two instrs are partial redundant if their basic blocks are reachable
852 // from one to another but one doesn't dominate another.
853 if (CMBB != MBB1) {
854 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
855 if (BB != nullptr && BB1 != nullptr &&
856 (isPotentiallyReachable(BB1, BB) ||
857 isPotentiallyReachable(BB, BB1))) {
858 // The following check extends the definition of `isConvergent` to
859 // assume a convergent instruction is dependent not only on additional
860 // conditions, but also on fewer conditions. LLVM does not have a
861 // MachineInstr attribute which expresses this extended definition, so
862 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
863 // subset of `isConvergent` instructions which do fall into this
864 // extended definition.
865 if (MI.isConvergent() && CMBB != MBB)
866 continue;
867
868 // If this instruction uses physical registers then we can only do PRE
869 // if it's using the value that is live at the place we're hoisting to.
870 bool NonLocal;
871 PhysDefVector PhysDefs;
872 if (!PhysRefs.empty() &&
873 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
874 PhysDefs, NonLocal))
875 continue;
876
877 assert(MI.getOperand(0).isDef() &&
878 "First operand of instr with one explicit def must be this def");
879 Register VReg = MI.getOperand(0).getReg();
880 Register NewReg = MRI->cloneVirtualRegister(VReg);
881 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
882 continue;
883 MachineInstr &NewMI =
884 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
885
886 // When hoisting, make sure we don't carry the debug location of
887 // the original instruction, as that's not correct and can cause
888 // unexpected jumps when debugging optimized code.
889 auto EmptyDL = DebugLoc();
890 NewMI.setDebugLoc(EmptyDL);
891
892 NewMI.getOperand(0).setReg(NewReg);
893
894 PREMap[&MI] = CMBB;
895 ++NumPREs;
896 Changed = true;
897 }
898 }
899 }
900 return Changed;
901}
902
903// This simple PRE (partial redundancy elimination) pass doesn't actually
904// eliminate partial redundancy but transforms it to full redundancy,
905// anticipating that the next CSE step will eliminate this created redundancy.
906// If CSE doesn't eliminate this, than created instruction will remain dead
907// and eliminated later by Remove Dead Machine Instructions pass.
908bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
910
911 PREMap.clear();
912 bool Changed = false;
913 BBs.push_back(DT->getRootNode());
914 do {
915 auto Node = BBs.pop_back_val();
916 append_range(BBs, Node->children());
917
918 MachineBasicBlock *MBB = Node->getBlock();
919 Changed |= ProcessBlockPRE(DT, MBB);
920
921 } while (!BBs.empty());
922
923 return Changed;
924}
925
926bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
928 MachineBasicBlock *MBB1) {
929 if (CandidateBB->getParent()->getFunction().hasMinSize())
930 return true;
931 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
932 assert(DT->dominates(CandidateBB, MBB1) &&
933 "CandidateBB should dominate MBB1");
934 return MBFI->getBlockFreq(CandidateBB) <=
935 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
936}
937
938bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
939 if (skipFunction(MF.getFunction()))
940 return false;
941
944 MRI = &MF.getRegInfo();
945 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
946 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
947 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
948 LookAheadLimit = TII->getMachineCSELookAheadLimit();
949 bool ChangedPRE, ChangedCSE;
950 ChangedPRE = PerformSimplePRE(DT);
951 ChangedCSE = PerformCSE(DT->getRootNode());
952 return ChangedPRE || ChangedCSE;
953}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Definition: MachineCSE.cpp:268
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:172
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
#define DEBUG_TYPE
Definition: MachineCSE.cpp:50
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2406
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:382
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:165
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:281
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...