LLVM 17.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
31#include "llvm/CodeGen/Passes.h"
37#include "llvm/MC/MCRegister.h"
39#include "llvm/Pass.h"
41#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <iterator>
46#include <utility>
47#include <vector>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
68namespace {
69
70 class MachineCSE : public MachineFunctionPass {
71 const TargetInstrInfo *TII;
73 AliasAnalysis *AA;
77
78 public:
79 static char ID; // Pass identification
80
81 MachineCSE() : MachineFunctionPass(ID) {
83 }
84
85 bool runOnMachineFunction(MachineFunction &MF) override;
86
87 void getAnalysisUsage(AnalysisUsage &AU) const override {
88 AU.setPreservesCFG();
96 }
97
100 .set(MachineFunctionProperties::Property::IsSSA);
101 }
102
103 void releaseMemory() override {
104 ScopeMap.clear();
105 PREMap.clear();
106 Exps.clear();
107 }
108
109 private:
110 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
112 using ScopedHTType =
114 AllocatorTy>;
115 using ScopeType = ScopedHTType::ScopeTy;
116 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
117
118 unsigned LookAheadLimit = 0;
121 PREMap;
122 ScopedHTType VNT;
124 unsigned CurrVN = 0;
125
126 bool PerformTrivialCopyPropagation(MachineInstr *MI,
128 bool isPhysDefTriviallyDead(MCRegister Reg,
131 bool hasLivePhysRegDefUses(const MachineInstr *MI,
132 const MachineBasicBlock *MBB,
133 SmallSet<MCRegister, 8> &PhysRefs,
134 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
135 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
136 SmallSet<MCRegister, 8> &PhysRefs,
137 PhysDefVector &PhysDefs, bool &NonLocal) const;
138 bool isCSECandidate(MachineInstr *MI);
139 bool isProfitableToCSE(Register CSReg, Register Reg,
141 void EnterScope(MachineBasicBlock *MBB);
142 void ExitScope(MachineBasicBlock *MBB);
143 bool ProcessBlockCSE(MachineBasicBlock *MBB);
144 void ExitScopeIfDone(MachineDomTreeNode *Node,
146 bool PerformCSE(MachineDomTreeNode *Node);
147
148 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
149 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
150 bool PerformSimplePRE(MachineDominatorTree *DT);
151 /// Heuristics to see if it's profitable to move common computations of MBB
152 /// and MBB1 to CandidateBB.
153 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
155 MachineBasicBlock *MBB1);
156 };
157
158} // end anonymous namespace
159
160char MachineCSE::ID = 0;
161
162char &llvm::MachineCSEID = MachineCSE::ID;
163
165 "Machine Common Subexpression Elimination", false, false)
169 "Machine Common Subexpression Elimination", false, false)
170
171/// The source register of a COPY machine instruction can be propagated to all
172/// its users, and this propagation could increase the probability of finding
173/// common subexpressions. If the COPY has only one user, the COPY itself can
174/// be removed.
175bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
177 bool Changed = false;
178 for (MachineOperand &MO : MI->operands()) {
179 if (!MO.isReg() || !MO.isUse())
180 continue;
181 Register Reg = MO.getReg();
182 if (!Reg.isVirtual())
183 continue;
184 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
185 MachineInstr *DefMI = MRI->getVRegDef(Reg);
186 if (!DefMI->isCopy())
187 continue;
188 Register SrcReg = DefMI->getOperand(1).getReg();
189 if (!SrcReg.isVirtual())
190 continue;
191 if (DefMI->getOperand(0).getSubReg())
192 continue;
193 // FIXME: We should trivially coalesce subregister copies to expose CSE
194 // opportunities on instructions with truncated operands (see
195 // cse-add-with-overflow.ll). This can be done here as follows:
196 // if (SrcSubReg)
197 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
198 // SrcSubReg);
199 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
200 //
201 // The 2-addr pass has been updated to handle coalesced subregs. However,
202 // some machine-specific code still can't handle it.
203 // To handle it properly we also need a way find a constrained subregister
204 // class given a super-reg class and subreg index.
205 if (DefMI->getOperand(1).getSubReg())
206 continue;
207 if (!MRI->constrainRegAttrs(SrcReg, Reg))
208 continue;
209 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
210 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
211
212 // Propagate SrcReg of copies to MI.
213 MO.setReg(SrcReg);
214 MRI->clearKillFlags(SrcReg);
215 // Coalesce single use copies.
216 if (OnlyOneUse) {
217 // If (and only if) we've eliminated all uses of the copy, also
218 // copy-propagate to any debug-users of MI, or they'll be left using
219 // an undefined value.
221
223 ++NumCoalesces;
224 }
225 Changed = true;
226 }
227
228 return Changed;
229}
230
231bool MachineCSE::isPhysDefTriviallyDead(
234 unsigned LookAheadLeft = LookAheadLimit;
235 while (LookAheadLeft) {
236 // Skip over dbg_value's.
238
239 if (I == E)
240 // Reached end of block, we don't know if register is dead or not.
241 return false;
242
243 bool SeenDef = false;
244 for (const MachineOperand &MO : I->operands()) {
245 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
246 SeenDef = true;
247 if (!MO.isReg() || !MO.getReg())
248 continue;
249 if (!TRI->regsOverlap(MO.getReg(), Reg))
250 continue;
251 if (MO.isUse())
252 // Found a use!
253 return false;
254 SeenDef = true;
255 }
256 if (SeenDef)
257 // See a def of Reg (or an alias) before encountering any use, it's
258 // trivially dead.
259 return true;
260
261 --LookAheadLeft;
262 ++I;
263 }
264 return false;
265}
266
268 const MachineOperand &MO,
269 const MachineFunction &MF,
270 const TargetRegisterInfo &TRI,
271 const TargetInstrInfo &TII) {
272 // MachineRegisterInfo::isConstantPhysReg directly called by
273 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
274 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
275 // most (if not all) targets freeze reserved registers right after ISel.
276 //
277 // It does cause issues mid-GlobalISel, however, hence the additional
278 // reservedRegsFrozen check.
279 const MachineRegisterInfo &MRI = MF.getRegInfo();
280 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
281 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
282}
283
284/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
285/// physical registers (except for dead defs of physical registers). It also
286/// returns the physical register def by reference if it's the only one and the
287/// instruction does not uses a physical register.
288bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
289 const MachineBasicBlock *MBB,
290 SmallSet<MCRegister, 8> &PhysRefs,
291 PhysDefVector &PhysDefs,
292 bool &PhysUseDef) const {
293 // First, add all uses to PhysRefs.
294 for (const MachineOperand &MO : MI->operands()) {
295 if (!MO.isReg() || MO.isDef())
296 continue;
297 Register Reg = MO.getReg();
298 if (!Reg)
299 continue;
300 if (Reg.isVirtual())
301 continue;
302 // Reading either caller preserved or constant physregs is ok.
303 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
304 *TII))
305 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
306 PhysRefs.insert(*AI);
307 }
308
309 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
310 // (which currently contains only uses), set the PhysUseDef flag.
311 PhysUseDef = false;
312 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
313 for (const auto &MOP : llvm::enumerate(MI->operands())) {
314 const MachineOperand &MO = MOP.value();
315 if (!MO.isReg() || !MO.isDef())
316 continue;
317 Register Reg = MO.getReg();
318 if (!Reg)
319 continue;
320 if (Reg.isVirtual())
321 continue;
322 // Check against PhysRefs even if the def is "dead".
323 if (PhysRefs.count(Reg.asMCReg()))
324 PhysUseDef = true;
325 // If the def is dead, it's ok. But the def may not marked "dead". That's
326 // common since this pass is run before livevariables. We can scan
327 // forward a few instructions and check if it is obviously dead.
328 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
329 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
330 }
331
332 // Finally, add all defs to PhysRefs as well.
333 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
334 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
335 ++AI)
336 PhysRefs.insert(*AI);
337
338 return !PhysRefs.empty();
339}
340
341bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
342 SmallSet<MCRegister, 8> &PhysRefs,
343 PhysDefVector &PhysDefs,
344 bool &NonLocal) const {
345 // For now conservatively returns false if the common subexpression is
346 // not in the same basic block as the given instruction. The only exception
347 // is if the common subexpression is in the sole predecessor block.
348 const MachineBasicBlock *MBB = MI->getParent();
349 const MachineBasicBlock *CSMBB = CSMI->getParent();
350
351 bool CrossMBB = false;
352 if (CSMBB != MBB) {
353 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
354 return false;
355
356 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
357 if (MRI->isAllocatable(PhysDefs[i].second) ||
358 MRI->isReserved(PhysDefs[i].second))
359 // Avoid extending live range of physical registers if they are
360 //allocatable or reserved.
361 return false;
362 }
363 CrossMBB = true;
364 }
365 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
368 unsigned LookAheadLeft = LookAheadLimit;
369 while (LookAheadLeft) {
370 // Skip over dbg_value's.
371 while (I != E && I != EE && I->isDebugInstr())
372 ++I;
373
374 if (I == EE) {
375 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
376 (void)CrossMBB;
377 CrossMBB = false;
378 NonLocal = true;
379 I = MBB->begin();
380 EE = MBB->end();
381 continue;
382 }
383
384 if (I == E)
385 return true;
386
387 for (const MachineOperand &MO : I->operands()) {
388 // RegMasks go on instructions like calls that clobber lots of physregs.
389 // Don't attempt to CSE across such an instruction.
390 if (MO.isRegMask())
391 return false;
392 if (!MO.isReg() || !MO.isDef())
393 continue;
394 Register MOReg = MO.getReg();
395 if (MOReg.isVirtual())
396 continue;
397 if (PhysRefs.count(MOReg.asMCReg()))
398 return false;
399 }
400
401 --LookAheadLeft;
402 ++I;
403 }
404
405 return false;
406}
407
408bool MachineCSE::isCSECandidate(MachineInstr *MI) {
409 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
410 MI->isInlineAsm() || MI->isDebugInstr())
411 return false;
412
413 // Ignore copies.
414 if (MI->isCopyLike())
415 return false;
416
417 // Ignore stuff that we obviously can't move.
418 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
419 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
420 return false;
421
422 if (MI->mayLoad()) {
423 // Okay, this instruction does a load. As a refinement, we allow the target
424 // to decide whether the loaded value is actually a constant. If so, we can
425 // actually use it as a load.
426 if (!MI->isDereferenceableInvariantLoad())
427 // FIXME: we should be able to hoist loads with no other side effects if
428 // there are no other instructions which can change memory in this loop.
429 // This is a trivial form of alias analysis.
430 return false;
431 }
432
433 // Ignore stack guard loads, otherwise the register that holds CSEed value may
434 // be spilled and get loaded back with corrupted data.
435 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
436 return false;
437
438 return true;
439}
440
441/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
442/// common expression that defines Reg. CSBB is basic block where CSReg is
443/// defined.
444bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
446 // FIXME: Heuristics that works around the lack the live range splitting.
447
448 // If CSReg is used at all uses of Reg, CSE should not increase register
449 // pressure of CSReg.
450 bool MayIncreasePressure = true;
451 if (CSReg.isVirtual() && Reg.isVirtual()) {
452 MayIncreasePressure = false;
454 int NumOfUses = 0;
455 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
456 CSUses.insert(&MI);
457 // Too costly to compute if NumOfUses is very large. Conservatively assume
458 // MayIncreasePressure to avoid spending too much time here.
459 if (++NumOfUses > CSUsesThreshold) {
460 MayIncreasePressure = true;
461 break;
462 }
463 }
464 if (!MayIncreasePressure)
465 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
466 if (!CSUses.count(&MI)) {
467 MayIncreasePressure = true;
468 break;
469 }
470 }
471 }
472 if (!MayIncreasePressure) return true;
473
474 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
475 // an immediate predecessor. We don't want to increase register pressure and
476 // end up causing other computation to be spilled.
477 if (TII->isAsCheapAsAMove(*MI)) {
478 MachineBasicBlock *BB = MI->getParent();
479 if (CSBB != BB && !CSBB->isSuccessor(BB))
480 return false;
481 }
482
483 // Heuristics #2: If the expression doesn't not use a vr and the only use
484 // of the redundant computation are copies, do not cse.
485 bool HasVRegUse = false;
486 for (const MachineOperand &MO : MI->operands()) {
487 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) {
488 HasVRegUse = true;
489 break;
490 }
491 }
492 if (!HasVRegUse) {
493 bool HasNonCopyUse = false;
494 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
495 // Ignore copies.
496 if (!MI.isCopyLike()) {
497 HasNonCopyUse = true;
498 break;
499 }
500 }
501 if (!HasNonCopyUse)
502 return false;
503 }
504
505 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
506 // it unless the defined value is already used in the BB of the new use.
507 bool HasPHI = false;
508 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
509 HasPHI |= UseMI.isPHI();
510 if (UseMI.getParent() == MI->getParent())
511 return true;
512 }
513
514 return !HasPHI;
515}
516
517void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
518 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
519 ScopeType *Scope = new ScopeType(VNT);
520 ScopeMap[MBB] = Scope;
521}
522
523void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
524 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
526 assert(SI != ScopeMap.end());
527 delete SI->second;
528 ScopeMap.erase(SI);
529}
530
531bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
532 bool Changed = false;
533
535 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
536 SmallVector<unsigned, 2> ImplicitDefs;
538 if (!isCSECandidate(&MI))
539 continue;
540
541 bool FoundCSE = VNT.count(&MI);
542 if (!FoundCSE) {
543 // Using trivial copy propagation to find more CSE opportunities.
544 if (PerformTrivialCopyPropagation(&MI, MBB)) {
545 Changed = true;
546
547 // After coalescing MI itself may become a copy.
548 if (MI.isCopyLike())
549 continue;
550
551 // Try again to see if CSE is possible.
552 FoundCSE = VNT.count(&MI);
553 }
554 }
555
556 // Commute commutable instructions.
557 bool Commuted = false;
558 if (!FoundCSE && MI.isCommutable()) {
559 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
560 Commuted = true;
561 FoundCSE = VNT.count(NewMI);
562 if (NewMI != &MI) {
563 // New instruction. It doesn't need to be kept.
564 NewMI->eraseFromParent();
565 Changed = true;
566 } else if (!FoundCSE)
567 // MI was changed but it didn't help, commute it back!
568 (void)TII->commuteInstruction(MI);
569 }
570 }
571
572 // If the instruction defines physical registers and the values *may* be
573 // used, then it's not safe to replace it with a common subexpression.
574 // It's also not safe if the instruction uses physical registers.
575 bool CrossMBBPhysDef = false;
577 PhysDefVector PhysDefs;
578 bool PhysUseDef = false;
579 if (FoundCSE &&
580 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
581 FoundCSE = false;
582
583 // ... Unless the CS is local or is in the sole predecessor block
584 // and it also defines the physical register which is not clobbered
585 // in between and the physical register uses were not clobbered.
586 // This can never be the case if the instruction both uses and
587 // defines the same physical register, which was detected above.
588 if (!PhysUseDef) {
589 unsigned CSVN = VNT.lookup(&MI);
590 MachineInstr *CSMI = Exps[CSVN];
591 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
592 FoundCSE = true;
593 }
594 }
595
596 if (!FoundCSE) {
597 VNT.insert(&MI, CurrVN++);
598 Exps.push_back(&MI);
599 continue;
600 }
601
602 // Found a common subexpression, eliminate it.
603 unsigned CSVN = VNT.lookup(&MI);
604 MachineInstr *CSMI = Exps[CSVN];
605 LLVM_DEBUG(dbgs() << "Examining: " << MI);
606 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
607
608 // Prevent CSE-ing non-local convergent instructions.
609 // LLVM's current definition of `isConvergent` does not necessarily prove
610 // that non-local CSE is illegal. The following check extends the definition
611 // of `isConvergent` to assume a convergent instruction is dependent not
612 // only on additional conditions, but also on fewer conditions. LLVM does
613 // not have a MachineInstr attribute which expresses this extended
614 // definition, so it's necessary to use `isConvergent` to prevent illegally
615 // CSE-ing the subset of `isConvergent` instructions which do fall into this
616 // extended definition.
617 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
618 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
619 "different BBs, avoid CSE!\n");
620 VNT.insert(&MI, CurrVN++);
621 Exps.push_back(&MI);
622 continue;
623 }
624
625 // Check if it's profitable to perform this CSE.
626 bool DoCSE = true;
627 unsigned NumDefs = MI.getNumDefs();
628
629 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
630 MachineOperand &MO = MI.getOperand(i);
631 if (!MO.isReg() || !MO.isDef())
632 continue;
633 Register OldReg = MO.getReg();
634 Register NewReg = CSMI->getOperand(i).getReg();
635
636 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
637 // we should make sure it is not dead at CSMI.
638 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
639 ImplicitDefsToUpdate.push_back(i);
640
641 // Keep track of implicit defs of CSMI and MI, to clear possibly
642 // made-redundant kill flags.
643 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
644 ImplicitDefs.push_back(OldReg);
645
646 if (OldReg == NewReg) {
647 --NumDefs;
648 continue;
649 }
650
651 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
652 "Do not CSE physical register defs!");
653
654 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
655 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
656 DoCSE = false;
657 break;
658 }
659
660 // Don't perform CSE if the result of the new instruction cannot exist
661 // within the constraints (register class, bank, or low-level type) of
662 // the old instruction.
663 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
665 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
666 DoCSE = false;
667 break;
668 }
669
670 CSEPairs.push_back(std::make_pair(OldReg, NewReg));
671 --NumDefs;
672 }
673
674 // Actually perform the elimination.
675 if (DoCSE) {
676 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
677 unsigned OldReg = CSEPair.first;
678 unsigned NewReg = CSEPair.second;
679 // OldReg may have been unused but is used now, clear the Dead flag
680 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
681 assert(Def != nullptr && "CSEd register has no unique definition?");
682 Def->clearRegisterDeads(NewReg);
683 // Replace with NewReg and clear kill flags which may be wrong now.
684 MRI->replaceRegWith(OldReg, NewReg);
685 MRI->clearKillFlags(NewReg);
686 }
687
688 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
689 // we should make sure it is not dead at CSMI.
690 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
691 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
692 for (const auto &PhysDef : PhysDefs)
693 if (!MI.getOperand(PhysDef.first).isDead())
694 CSMI->getOperand(PhysDef.first).setIsDead(false);
695
696 // Go through implicit defs of CSMI and MI, and clear the kill flags on
697 // their uses in all the instructions between CSMI and MI.
698 // We might have made some of the kill flags redundant, consider:
699 // subs ... implicit-def %nzcv <- CSMI
700 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
701 // subs ... implicit-def %nzcv <- MI, to be eliminated
702 // csinc ... implicit killed %nzcv
703 // Since we eliminated MI, and reused a register imp-def'd by CSMI
704 // (here %nzcv), that register, if it was killed before MI, should have
705 // that kill flag removed, because it's lifetime was extended.
706 if (CSMI->getParent() == MI.getParent()) {
707 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
708 for (auto ImplicitDef : ImplicitDefs)
709 if (MachineOperand *MO = II->findRegisterUseOperand(
710 ImplicitDef, /*isKill=*/true, TRI))
711 MO->setIsKill(false);
712 } else {
713 // If the instructions aren't in the same BB, bail out and clear the
714 // kill flag on all uses of the imp-def'd register.
715 for (auto ImplicitDef : ImplicitDefs)
716 MRI->clearKillFlags(ImplicitDef);
717 }
718
719 if (CrossMBBPhysDef) {
720 // Add physical register defs now coming in from a predecessor to MBB
721 // livein list.
722 while (!PhysDefs.empty()) {
723 auto LiveIn = PhysDefs.pop_back_val();
724 if (!MBB->isLiveIn(LiveIn.second))
725 MBB->addLiveIn(LiveIn.second);
726 }
727 ++NumCrossBBCSEs;
728 }
729
730 MI.eraseFromParent();
731 ++NumCSEs;
732 if (!PhysRefs.empty())
733 ++NumPhysCSEs;
734 if (Commuted)
735 ++NumCommutes;
736 Changed = true;
737 } else {
738 VNT.insert(&MI, CurrVN++);
739 Exps.push_back(&MI);
740 }
741 CSEPairs.clear();
742 ImplicitDefsToUpdate.clear();
743 ImplicitDefs.clear();
744 }
745
746 return Changed;
747}
748
749/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
750/// dominator tree node if its a leaf or all of its children are done. Walk
751/// up the dominator tree to destroy ancestors which are now done.
752void
753MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
755 if (OpenChildren[Node])
756 return;
757
758 // Pop scope.
759 ExitScope(Node->getBlock());
760
761 // Now traverse upwards to pop ancestors whose offsprings are all done.
762 while (MachineDomTreeNode *Parent = Node->getIDom()) {
763 unsigned Left = --OpenChildren[Parent];
764 if (Left != 0)
765 break;
766 ExitScope(Parent->getBlock());
767 Node = Parent;
768 }
769}
770
771bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
775
776 CurrVN = 0;
777
778 // Perform a DFS walk to determine the order of visit.
779 WorkList.push_back(Node);
780 do {
781 Node = WorkList.pop_back_val();
782 Scopes.push_back(Node);
783 OpenChildren[Node] = Node->getNumChildren();
784 append_range(WorkList, Node->children());
785 } while (!WorkList.empty());
786
787 // Now perform CSE.
788 bool Changed = false;
789 for (MachineDomTreeNode *Node : Scopes) {
790 MachineBasicBlock *MBB = Node->getBlock();
791 EnterScope(MBB);
792 Changed |= ProcessBlockCSE(MBB);
793 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
794 ExitScopeIfDone(Node, OpenChildren);
795 }
796
797 return Changed;
798}
799
800// We use stronger checks for PRE candidate rather than for CSE ones to embrace
801// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
802// to exclude instrs created by PRE that won't be CSEed later.
803bool MachineCSE::isPRECandidate(MachineInstr *MI,
804 SmallSet<MCRegister, 8> &PhysRefs) {
805 if (!isCSECandidate(MI) ||
806 MI->isNotDuplicable() ||
807 MI->mayLoad() ||
809 MI->getNumDefs() != 1 ||
810 MI->getNumExplicitDefs() != 1)
811 return false;
812
813 for (const MachineOperand &MO : MI->operands()) {
814 if (MO.isReg() && !MO.getReg().isVirtual()) {
815 if (MO.isDef())
816 return false;
817 else
818 PhysRefs.insert(MO.getReg());
819 }
820 }
821
822 return true;
823}
824
825bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
827 bool Changed = false;
830 if (!isPRECandidate(&MI, PhysRefs))
831 continue;
832
833 if (!PREMap.count(&MI)) {
834 PREMap[&MI] = MBB;
835 continue;
836 }
837
838 auto MBB1 = PREMap[&MI];
839 assert(
840 !DT->properlyDominates(MBB, MBB1) &&
841 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
842 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
843 if (!CMBB->isLegalToHoistInto())
844 continue;
845
846 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
847 continue;
848
849 // Two instrs are partial redundant if their basic blocks are reachable
850 // from one to another but one doesn't dominate another.
851 if (CMBB != MBB1) {
852 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
853 if (BB != nullptr && BB1 != nullptr &&
854 (isPotentiallyReachable(BB1, BB) ||
855 isPotentiallyReachable(BB, BB1))) {
856 // The following check extends the definition of `isConvergent` to
857 // assume a convergent instruction is dependent not only on additional
858 // conditions, but also on fewer conditions. LLVM does not have a
859 // MachineInstr attribute which expresses this extended definition, so
860 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
861 // subset of `isConvergent` instructions which do fall into this
862 // extended definition.
863 if (MI.isConvergent() && CMBB != MBB)
864 continue;
865
866 // If this instruction uses physical registers then we can only do PRE
867 // if it's using the value that is live at the place we're hoisting to.
868 bool NonLocal;
869 PhysDefVector PhysDefs;
870 if (!PhysRefs.empty() &&
871 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
872 PhysDefs, NonLocal))
873 continue;
874
875 assert(MI.getOperand(0).isDef() &&
876 "First operand of instr with one explicit def must be this def");
877 Register VReg = MI.getOperand(0).getReg();
878 Register NewReg = MRI->cloneVirtualRegister(VReg);
879 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
880 continue;
881 MachineInstr &NewMI =
882 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
883
884 // When hoisting, make sure we don't carry the debug location of
885 // the original instruction, as that's not correct and can cause
886 // unexpected jumps when debugging optimized code.
887 auto EmptyDL = DebugLoc();
888 NewMI.setDebugLoc(EmptyDL);
889
890 NewMI.getOperand(0).setReg(NewReg);
891
892 PREMap[&MI] = CMBB;
893 ++NumPREs;
894 Changed = true;
895 }
896 }
897 }
898 return Changed;
899}
900
901// This simple PRE (partial redundancy elimination) pass doesn't actually
902// eliminate partial redundancy but transforms it to full redundancy,
903// anticipating that the next CSE step will eliminate this created redundancy.
904// If CSE doesn't eliminate this, than created instruction will remain dead
905// and eliminated later by Remove Dead Machine Instructions pass.
906bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
908
909 PREMap.clear();
910 bool Changed = false;
911 BBs.push_back(DT->getRootNode());
912 do {
913 auto Node = BBs.pop_back_val();
914 append_range(BBs, Node->children());
915
916 MachineBasicBlock *MBB = Node->getBlock();
917 Changed |= ProcessBlockPRE(DT, MBB);
918
919 } while (!BBs.empty());
920
921 return Changed;
922}
923
924bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
926 MachineBasicBlock *MBB1) {
927 if (CandidateBB->getParent()->getFunction().hasMinSize())
928 return true;
929 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
930 assert(DT->dominates(CandidateBB, MBB1) &&
931 "CandidateBB should dominate MBB1");
932 return MBFI->getBlockFreq(CandidateBB) <=
933 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
934}
935
936bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
937 if (skipFunction(MF.getFunction()))
938 return false;
939
942 MRI = &MF.getRegInfo();
943 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
944 DT = &getAnalysis<MachineDominatorTree>();
945 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
946 LookAheadLimit = TII->getMachineCSELookAheadLimit();
947 bool ChangedPRE, ChangedCSE;
948 ChangedPRE = PerformSimplePRE(DT);
949 ChangedCSE = PerformCSE(DT->getRootNode());
950 return ChangedPRE || ChangedCSE;
951}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Definition: MachineCSE.cpp:267
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:169
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
#define DEBUG_TYPE
Definition: MachineCSE.cpp:51
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:646
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
bool empty() const
Definition: SmallSet.h:157
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2424
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:375
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:162
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...