LLVM 20.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
32#include "llvm/CodeGen/Passes.h"
38#include "llvm/MC/MCRegister.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
69 "aggressive-machine-cse", cl::Hidden, cl::init(false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
90 using ScopedHTType =
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
95
96 unsigned LookAheadLimit = 0;
99 PREMap;
100 ScopedHTType VNT;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 SmallSet<MCRegister, 8> &PhysRefs,
114 PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {
141 }
142
143 bool runOnMachineFunction(MachineFunction &MF) override;
144
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.setPreservesCFG();
153 }
154
157 MachineFunctionProperties::Property::IsSSA);
158 }
159};
160} // end anonymous namespace
161
162char MachineCSELegacy::ID = 0;
163
164char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
165
167 "Machine Common Subexpression Elimination", false, false)
170 "Machine Common Subexpression Elimination", false, false)
171
172/// The source register of a COPY machine instruction can be propagated to all
173/// its users, and this propagation could increase the probability of finding
174/// common subexpressions. If the COPY has only one user, the COPY itself can
175/// be removed.
176bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
178 bool Changed = false;
179 for (MachineOperand &MO : MI->all_uses()) {
180 Register Reg = MO.getReg();
181 if (!Reg.isVirtual())
182 continue;
183 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
184 MachineInstr *DefMI = MRI->getVRegDef(Reg);
185 if (!DefMI || !DefMI->isCopy())
186 continue;
187 Register SrcReg = DefMI->getOperand(1).getReg();
188 if (!SrcReg.isVirtual())
189 continue;
190 if (DefMI->getOperand(0).getSubReg())
191 continue;
192 // FIXME: We should trivially coalesce subregister copies to expose CSE
193 // opportunities on instructions with truncated operands (see
194 // cse-add-with-overflow.ll). This can be done here as follows:
195 // if (SrcSubReg)
196 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
197 // SrcSubReg);
198 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
199 //
200 // The 2-addr pass has been updated to handle coalesced subregs. However,
201 // some machine-specific code still can't handle it.
202 // To handle it properly we also need a way find a constrained subregister
203 // class given a super-reg class and subreg index.
204 if (DefMI->getOperand(1).getSubReg())
205 continue;
206 if (!MRI->constrainRegAttrs(SrcReg, Reg))
207 continue;
208 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
209 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
210
211 // Propagate SrcReg of copies to MI.
212 MO.setReg(SrcReg);
213 MRI->clearKillFlags(SrcReg);
214 // Coalesce single use copies.
215 if (OnlyOneUse) {
216 // If (and only if) we've eliminated all uses of the copy, also
217 // copy-propagate to any debug-users of MI, or they'll be left using
218 // an undefined value.
220
222 ++NumCoalesces;
223 }
224 Changed = true;
225 }
226
227 return Changed;
228}
229
230bool MachineCSEImpl::isPhysDefTriviallyDead(
233 unsigned LookAheadLeft = LookAheadLimit;
234 while (LookAheadLeft) {
235 // Skip over dbg_value's.
237
238 if (I == E)
239 // Reached end of block, we don't know if register is dead or not.
240 return false;
241
242 bool SeenDef = false;
243 for (const MachineOperand &MO : I->operands()) {
244 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
245 SeenDef = true;
246 if (!MO.isReg() || !MO.getReg())
247 continue;
248 if (!TRI->regsOverlap(MO.getReg(), Reg))
249 continue;
250 if (MO.isUse())
251 // Found a use!
252 return false;
253 SeenDef = true;
254 }
255 if (SeenDef)
256 // See a def of Reg (or an alias) before encountering any use, it's
257 // trivially dead.
258 return true;
259
260 --LookAheadLeft;
261 ++I;
262 }
263 return false;
264}
265
267 const MachineOperand &MO,
268 const MachineFunction &MF,
269 const TargetRegisterInfo &TRI,
270 const TargetInstrInfo &TII) {
271 // MachineRegisterInfo::isConstantPhysReg directly called by
272 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
273 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
274 // most (if not all) targets freeze reserved registers right after ISel.
275 //
276 // It does cause issues mid-GlobalISel, however, hence the additional
277 // reservedRegsFrozen check.
278 const MachineRegisterInfo &MRI = MF.getRegInfo();
279 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
280 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
281}
282
283/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
284/// physical registers (except for dead defs of physical registers). It also
285/// returns the physical register def by reference if it's the only one and the
286/// instruction does not uses a physical register.
287bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
288 const MachineBasicBlock *MBB,
289 SmallSet<MCRegister, 8> &PhysRefs,
290 PhysDefVector &PhysDefs,
291 bool &PhysUseDef) const {
292 // First, add all uses to PhysRefs.
293 for (const MachineOperand &MO : MI->all_uses()) {
294 Register Reg = MO.getReg();
295 if (!Reg)
296 continue;
297 if (Reg.isVirtual())
298 continue;
299 // Reading either caller preserved or constant physregs is ok.
300 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
301 *TII))
302 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
303 PhysRefs.insert(*AI);
304 }
305
306 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
307 // (which currently contains only uses), set the PhysUseDef flag.
308 PhysUseDef = false;
309 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
310 for (const auto &MOP : llvm::enumerate(MI->operands())) {
311 const MachineOperand &MO = MOP.value();
312 if (!MO.isReg() || !MO.isDef())
313 continue;
314 Register Reg = MO.getReg();
315 if (!Reg)
316 continue;
317 if (Reg.isVirtual())
318 continue;
319 // Check against PhysRefs even if the def is "dead".
320 if (PhysRefs.count(Reg.asMCReg()))
321 PhysUseDef = true;
322 // If the def is dead, it's ok. But the def may not marked "dead". That's
323 // common since this pass is run before livevariables. We can scan
324 // forward a few instructions and check if it is obviously dead.
325 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
326 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
327 }
328
329 // Finally, add all defs to PhysRefs as well.
330 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
331 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
332 ++AI)
333 PhysRefs.insert(*AI);
334
335 return !PhysRefs.empty();
336}
337
338bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
339 SmallSet<MCRegister, 8> &PhysRefs,
340 PhysDefVector &PhysDefs,
341 bool &NonLocal) const {
342 // For now conservatively returns false if the common subexpression is
343 // not in the same basic block as the given instruction. The only exception
344 // is if the common subexpression is in the sole predecessor block.
345 const MachineBasicBlock *MBB = MI->getParent();
346 const MachineBasicBlock *CSMBB = CSMI->getParent();
347
348 bool CrossMBB = false;
349 if (CSMBB != MBB) {
350 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
351 return false;
352
353 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
354 if (MRI->isAllocatable(PhysDefs[i].second) ||
355 MRI->isReserved(PhysDefs[i].second))
356 // Avoid extending live range of physical registers if they are
357 //allocatable or reserved.
358 return false;
359 }
360 CrossMBB = true;
361 }
362 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
365 unsigned LookAheadLeft = LookAheadLimit;
366 while (LookAheadLeft) {
367 // Skip over dbg_value's.
368 while (I != E && I != EE && I->isDebugInstr())
369 ++I;
370
371 if (I == EE) {
372 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
373 (void)CrossMBB;
374 CrossMBB = false;
375 NonLocal = true;
376 I = MBB->begin();
377 EE = MBB->end();
378 continue;
379 }
380
381 if (I == E)
382 return true;
383
384 for (const MachineOperand &MO : I->operands()) {
385 // RegMasks go on instructions like calls that clobber lots of physregs.
386 // Don't attempt to CSE across such an instruction.
387 if (MO.isRegMask())
388 return false;
389 if (!MO.isReg() || !MO.isDef())
390 continue;
391 Register MOReg = MO.getReg();
392 if (MOReg.isVirtual())
393 continue;
394 if (PhysRefs.count(MOReg.asMCReg()))
395 return false;
396 }
397
398 --LookAheadLeft;
399 ++I;
400 }
401
402 return false;
403}
404
405bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
406 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
407 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
408 MI->isFakeUse())
409 return false;
410
411 // Ignore copies.
412 if (MI->isCopyLike())
413 return false;
414
415 // Ignore stuff that we obviously can't move.
416 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
417 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
418 return false;
419
420 if (MI->mayLoad()) {
421 // Okay, this instruction does a load. As a refinement, we allow the target
422 // to decide whether the loaded value is actually a constant. If so, we can
423 // actually use it as a load.
424 if (!MI->isDereferenceableInvariantLoad())
425 // FIXME: we should be able to hoist loads with no other side effects if
426 // there are no other instructions which can change memory in this loop.
427 // This is a trivial form of alias analysis.
428 return false;
429 }
430
431 // Ignore stack guard loads, otherwise the register that holds CSEed value may
432 // be spilled and get loaded back with corrupted data.
433 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
434 return false;
435
436 return true;
437}
438
439/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
440/// common expression that defines Reg. CSBB is basic block where CSReg is
441/// defined.
442bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
443 MachineBasicBlock *CSBB,
444 MachineInstr *MI) {
446 return true;
447
448 // FIXME: Heuristics that works around the lack the live range splitting.
449
450 // If CSReg is used at all uses of Reg, CSE should not increase register
451 // pressure of CSReg.
452 bool MayIncreasePressure = true;
453 if (CSReg.isVirtual() && Reg.isVirtual()) {
454 MayIncreasePressure = false;
456 int NumOfUses = 0;
457 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
458 CSUses.insert(&MI);
459 // Too costly to compute if NumOfUses is very large. Conservatively assume
460 // MayIncreasePressure to avoid spending too much time here.
461 if (++NumOfUses > CSUsesThreshold) {
462 MayIncreasePressure = true;
463 break;
464 }
465 }
466 if (!MayIncreasePressure)
467 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
468 if (!CSUses.count(&MI)) {
469 MayIncreasePressure = true;
470 break;
471 }
472 }
473 }
474 if (!MayIncreasePressure) return true;
475
476 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
477 // an immediate predecessor. We don't want to increase register pressure and
478 // end up causing other computation to be spilled.
479 if (TII->isAsCheapAsAMove(*MI)) {
480 MachineBasicBlock *BB = MI->getParent();
481 if (CSBB != BB && !CSBB->isSuccessor(BB))
482 return false;
483 }
484
485 // Heuristics #2: If the expression doesn't not use a vr and the only use
486 // of the redundant computation are copies, do not cse.
487 bool HasVRegUse = false;
488 for (const MachineOperand &MO : MI->all_uses()) {
489 if (MO.getReg().isVirtual()) {
490 HasVRegUse = true;
491 break;
492 }
493 }
494 if (!HasVRegUse) {
495 bool HasNonCopyUse = false;
496 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
497 // Ignore copies.
498 if (!MI.isCopyLike()) {
499 HasNonCopyUse = true;
500 break;
501 }
502 }
503 if (!HasNonCopyUse)
504 return false;
505 }
506
507 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
508 // it unless the defined value is already used in the BB of the new use.
509 bool HasPHI = false;
510 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
511 HasPHI |= UseMI.isPHI();
512 if (UseMI.getParent() == MI->getParent())
513 return true;
514 }
515
516 return !HasPHI;
517}
518
519void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
520 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
521 ScopeType *Scope = new ScopeType(VNT);
522 ScopeMap[MBB] = Scope;
523}
524
525void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
526 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
528 assert(SI != ScopeMap.end());
529 delete SI->second;
530 ScopeMap.erase(SI);
531}
532
533bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
534 bool Changed = false;
535
537 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
538 SmallVector<unsigned, 2> ImplicitDefs;
540 if (!isCSECandidate(&MI))
541 continue;
542
543 bool FoundCSE = VNT.count(&MI);
544 if (!FoundCSE) {
545 // Using trivial copy propagation to find more CSE opportunities.
546 if (PerformTrivialCopyPropagation(&MI, MBB)) {
547 Changed = true;
548
549 // After coalescing MI itself may become a copy.
550 if (MI.isCopyLike())
551 continue;
552
553 // Try again to see if CSE is possible.
554 FoundCSE = VNT.count(&MI);
555 }
556 }
557
558 // Commute commutable instructions.
559 bool Commuted = false;
560 if (!FoundCSE && MI.isCommutable()) {
561 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
562 Commuted = true;
563 FoundCSE = VNT.count(NewMI);
564 if (NewMI != &MI) {
565 // New instruction. It doesn't need to be kept.
566 NewMI->eraseFromParent();
567 Changed = true;
568 } else if (!FoundCSE)
569 // MI was changed but it didn't help, commute it back!
570 (void)TII->commuteInstruction(MI);
571 }
572 }
573
574 // If the instruction defines physical registers and the values *may* be
575 // used, then it's not safe to replace it with a common subexpression.
576 // It's also not safe if the instruction uses physical registers.
577 bool CrossMBBPhysDef = false;
579 PhysDefVector PhysDefs;
580 bool PhysUseDef = false;
581 if (FoundCSE &&
582 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
583 FoundCSE = false;
584
585 // ... Unless the CS is local or is in the sole predecessor block
586 // and it also defines the physical register which is not clobbered
587 // in between and the physical register uses were not clobbered.
588 // This can never be the case if the instruction both uses and
589 // defines the same physical register, which was detected above.
590 if (!PhysUseDef) {
591 unsigned CSVN = VNT.lookup(&MI);
592 MachineInstr *CSMI = Exps[CSVN];
593 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
594 FoundCSE = true;
595 }
596 }
597
598 if (!FoundCSE) {
599 VNT.insert(&MI, CurrVN++);
600 Exps.push_back(&MI);
601 continue;
602 }
603
604 // Found a common subexpression, eliminate it.
605 unsigned CSVN = VNT.lookup(&MI);
606 MachineInstr *CSMI = Exps[CSVN];
607 LLVM_DEBUG(dbgs() << "Examining: " << MI);
608 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
609
610 // Prevent CSE-ing non-local convergent instructions.
611 // LLVM's current definition of `isConvergent` does not necessarily prove
612 // that non-local CSE is illegal. The following check extends the definition
613 // of `isConvergent` to assume a convergent instruction is dependent not
614 // only on additional conditions, but also on fewer conditions. LLVM does
615 // not have a MachineInstr attribute which expresses this extended
616 // definition, so it's necessary to use `isConvergent` to prevent illegally
617 // CSE-ing the subset of `isConvergent` instructions which do fall into this
618 // extended definition.
619 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
620 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
621 "different BBs, avoid CSE!\n");
622 VNT.insert(&MI, CurrVN++);
623 Exps.push_back(&MI);
624 continue;
625 }
626
627 // Check if it's profitable to perform this CSE.
628 bool DoCSE = true;
629 unsigned NumDefs = MI.getNumDefs();
630
631 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
632 MachineOperand &MO = MI.getOperand(i);
633 if (!MO.isReg() || !MO.isDef())
634 continue;
635 Register OldReg = MO.getReg();
636 Register NewReg = CSMI->getOperand(i).getReg();
637
638 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
639 // we should make sure it is not dead at CSMI.
640 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
641 ImplicitDefsToUpdate.push_back(i);
642
643 // Keep track of implicit defs of CSMI and MI, to clear possibly
644 // made-redundant kill flags.
645 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
646 ImplicitDefs.push_back(OldReg);
647
648 if (OldReg == NewReg) {
649 --NumDefs;
650 continue;
651 }
652
653 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
654 "Do not CSE physical register defs!");
655
656 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
657 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
658 DoCSE = false;
659 break;
660 }
661
662 // Don't perform CSE if the result of the new instruction cannot exist
663 // within the constraints (register class, bank, or low-level type) of
664 // the old instruction.
665 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
667 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
668 DoCSE = false;
669 break;
670 }
671
672 CSEPairs.push_back(std::make_pair(OldReg, NewReg));
673 --NumDefs;
674 }
675
676 // Actually perform the elimination.
677 if (DoCSE) {
678 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
679 unsigned OldReg = CSEPair.first;
680 unsigned NewReg = CSEPair.second;
681 // OldReg may have been unused but is used now, clear the Dead flag
682 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
683 assert(Def != nullptr && "CSEd register has no unique definition?");
684 Def->clearRegisterDeads(NewReg);
685 // Replace with NewReg and clear kill flags which may be wrong now.
686 MRI->replaceRegWith(OldReg, NewReg);
687 MRI->clearKillFlags(NewReg);
688 }
689
690 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
691 // we should make sure it is not dead at CSMI.
692 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
693 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
694 for (const auto &PhysDef : PhysDefs)
695 if (!MI.getOperand(PhysDef.first).isDead())
696 CSMI->getOperand(PhysDef.first).setIsDead(false);
697
698 // Go through implicit defs of CSMI and MI, and clear the kill flags on
699 // their uses in all the instructions between CSMI and MI.
700 // We might have made some of the kill flags redundant, consider:
701 // subs ... implicit-def %nzcv <- CSMI
702 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
703 // subs ... implicit-def %nzcv <- MI, to be eliminated
704 // csinc ... implicit killed %nzcv
705 // Since we eliminated MI, and reused a register imp-def'd by CSMI
706 // (here %nzcv), that register, if it was killed before MI, should have
707 // that kill flag removed, because it's lifetime was extended.
708 if (CSMI->getParent() == MI.getParent()) {
709 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
710 for (auto ImplicitDef : ImplicitDefs)
711 if (MachineOperand *MO = II->findRegisterUseOperand(
712 ImplicitDef, TRI, /*isKill=*/true))
713 MO->setIsKill(false);
714 } else {
715 // If the instructions aren't in the same BB, bail out and clear the
716 // kill flag on all uses of the imp-def'd register.
717 for (auto ImplicitDef : ImplicitDefs)
718 MRI->clearKillFlags(ImplicitDef);
719 }
720
721 if (CrossMBBPhysDef) {
722 // Add physical register defs now coming in from a predecessor to MBB
723 // livein list.
724 while (!PhysDefs.empty()) {
725 auto LiveIn = PhysDefs.pop_back_val();
726 if (!MBB->isLiveIn(LiveIn.second))
727 MBB->addLiveIn(LiveIn.second);
728 }
729 ++NumCrossBBCSEs;
730 }
731
732 MI.eraseFromParent();
733 ++NumCSEs;
734 if (!PhysRefs.empty())
735 ++NumPhysCSEs;
736 if (Commuted)
737 ++NumCommutes;
738 Changed = true;
739 } else {
740 VNT.insert(&MI, CurrVN++);
741 Exps.push_back(&MI);
742 }
743 CSEPairs.clear();
744 ImplicitDefsToUpdate.clear();
745 ImplicitDefs.clear();
746 }
747
748 return Changed;
749}
750
751/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
752/// dominator tree node if its a leaf or all of its children are done. Walk
753/// up the dominator tree to destroy ancestors which are now done.
754void MachineCSEImpl::ExitScopeIfDone(
757 if (OpenChildren[Node])
758 return;
759
760 // Pop scope.
761 ExitScope(Node->getBlock());
762
763 // Now traverse upwards to pop ancestors whose offsprings are all done.
764 while (MachineDomTreeNode *Parent = Node->getIDom()) {
765 unsigned Left = --OpenChildren[Parent];
766 if (Left != 0)
767 break;
768 ExitScope(Parent->getBlock());
769 Node = Parent;
770 }
771}
772
773bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
777
778 CurrVN = 0;
779
780 // Perform a DFS walk to determine the order of visit.
781 WorkList.push_back(Node);
782 do {
783 Node = WorkList.pop_back_val();
784 Scopes.push_back(Node);
785 OpenChildren[Node] = Node->getNumChildren();
786 append_range(WorkList, Node->children());
787 } while (!WorkList.empty());
788
789 // Now perform CSE.
790 bool Changed = false;
791 for (MachineDomTreeNode *Node : Scopes) {
792 MachineBasicBlock *MBB = Node->getBlock();
793 EnterScope(MBB);
794 Changed |= ProcessBlockCSE(MBB);
795 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
796 ExitScopeIfDone(Node, OpenChildren);
797 }
798
799 return Changed;
800}
801
802// We use stronger checks for PRE candidate rather than for CSE ones to embrace
803// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
804// to exclude instrs created by PRE that won't be CSEed later.
805bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
806 SmallSet<MCRegister, 8> &PhysRefs) {
807 if (!isCSECandidate(MI) ||
808 MI->isNotDuplicable() ||
809 MI->mayLoad() ||
811 MI->getNumDefs() != 1 ||
812 MI->getNumExplicitDefs() != 1)
813 return false;
814
815 for (const MachineOperand &MO : MI->operands()) {
816 if (MO.isReg() && !MO.getReg().isVirtual()) {
817 if (MO.isDef())
818 return false;
819 else
820 PhysRefs.insert(MO.getReg());
821 }
822 }
823
824 return true;
825}
826
827bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
829 bool Changed = false;
832 if (!isPRECandidate(&MI, PhysRefs))
833 continue;
834
835 if (!PREMap.count(&MI)) {
836 PREMap[&MI] = MBB;
837 continue;
838 }
839
840 auto MBB1 = PREMap[&MI];
841 assert(
842 !DT->properlyDominates(MBB, MBB1) &&
843 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
844 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
845 if (!CMBB->isLegalToHoistInto())
846 continue;
847
848 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
849 continue;
850
851 // Two instrs are partial redundant if their basic blocks are reachable
852 // from one to another but one doesn't dominate another.
853 if (CMBB != MBB1) {
854 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
855 if (BB != nullptr && BB1 != nullptr &&
856 (isPotentiallyReachable(BB1, BB) ||
857 isPotentiallyReachable(BB, BB1))) {
858 // The following check extends the definition of `isConvergent` to
859 // assume a convergent instruction is dependent not only on additional
860 // conditions, but also on fewer conditions. LLVM does not have a
861 // MachineInstr attribute which expresses this extended definition, so
862 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
863 // subset of `isConvergent` instructions which do fall into this
864 // extended definition.
865 if (MI.isConvergent() && CMBB != MBB)
866 continue;
867
868 // If this instruction uses physical registers then we can only do PRE
869 // if it's using the value that is live at the place we're hoisting to.
870 bool NonLocal;
871 PhysDefVector PhysDefs;
872 if (!PhysRefs.empty() &&
873 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
874 PhysDefs, NonLocal))
875 continue;
876
877 assert(MI.getOperand(0).isDef() &&
878 "First operand of instr with one explicit def must be this def");
879 Register VReg = MI.getOperand(0).getReg();
880 Register NewReg = MRI->cloneVirtualRegister(VReg);
881 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
882 continue;
883 MachineInstr &NewMI =
884 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
885
886 // When hoisting, make sure we don't carry the debug location of
887 // the original instruction, as that's not correct and can cause
888 // unexpected jumps when debugging optimized code.
889 auto EmptyDL = DebugLoc();
890 NewMI.setDebugLoc(EmptyDL);
891
892 NewMI.getOperand(0).setReg(NewReg);
893
894 PREMap[&MI] = CMBB;
895 ++NumPREs;
896 Changed = true;
897 }
898 }
899 }
900 return Changed;
901}
902
903// This simple PRE (partial redundancy elimination) pass doesn't actually
904// eliminate partial redundancy but transforms it to full redundancy,
905// anticipating that the next CSE step will eliminate this created redundancy.
906// If CSE doesn't eliminate this, than created instruction will remain dead
907// and eliminated later by Remove Dead Machine Instructions pass.
908bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
910
911 PREMap.clear();
912 bool Changed = false;
913 BBs.push_back(DT->getRootNode());
914 do {
915 auto Node = BBs.pop_back_val();
916 append_range(BBs, Node->children());
917
918 MachineBasicBlock *MBB = Node->getBlock();
919 Changed |= ProcessBlockPRE(DT, MBB);
920
921 } while (!BBs.empty());
922
923 return Changed;
924}
925
926bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
928 MachineBasicBlock *MBB1) {
929 if (CandidateBB->getParent()->getFunction().hasMinSize())
930 return true;
931 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
932 assert(DT->dominates(CandidateBB, MBB1) &&
933 "CandidateBB should dominate MBB1");
934 return MBFI->getBlockFreq(CandidateBB) <=
935 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
936}
937
938void MachineCSEImpl::releaseMemory() {
939 ScopeMap.clear();
940 PREMap.clear();
941 Exps.clear();
942}
943
944bool MachineCSEImpl::run(MachineFunction &MF) {
947 MRI = &MF.getRegInfo();
948 LookAheadLimit = TII->getMachineCSELookAheadLimit();
949 bool ChangedPRE, ChangedCSE;
950 ChangedPRE = PerformSimplePRE(DT);
951 ChangedCSE = PerformCSE(DT->getRootNode());
952 releaseMemory();
953 return ChangedPRE || ChangedCSE;
954}
955
958 MFPropsModifier _(*this, MF);
959
963 MachineCSEImpl Impl(&MDT, &MBFI);
964 bool Changed = Impl.run(MF);
965 if (!Changed)
966 return PreservedAnalyses::all();
967
969 PA.preserve<MachineLoopAnalysis>();
970 PA.preserve<MachineDominatorTreeAnalysis>();
971 PA.preserve<MachineBlockFrequencyAnalysis>();
972 PA.preserveSet<CFGAnalyses>();
973 return PA;
974}
975
976bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
977 if (skipFunction(MF.getFunction()))
978 return false;
979
981 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
983 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
984 MachineCSEImpl Impl(&MDT, &MBFI);
985 return Impl.run(MF);
986}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Definition: MachineCSE.cpp:266
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:170
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
#define DEBUG_TYPE
Definition: MachineCSE.cpp:51
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
An RAII based helper class to modify MachineFunctionProperties when running pass.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition: MachineCSE.cpp:956
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
void initializeMachineCSELegacyPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:382
char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:164
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:281
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...