LLVM 20.0.0git
PeepholeOptimizer.cpp
Go to the documentation of this file.
1//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Perform peephole optimizations on the machine code:
10//
11// - Optimize Extensions
12//
13// Optimization of sign / zero extension instructions. It may be extended to
14// handle other instructions with similar properties.
15//
16// On some targets, some instructions, e.g. X86 sign / zero extension, may
17// leave the source value in the lower part of the result. This optimization
18// will replace some uses of the pre-extension value with uses of the
19// sub-register of the results.
20//
21// - Optimize Comparisons
22//
23// Optimization of comparison instructions. For instance, in this code:
24//
25// sub r1, 1
26// cmp r1, 0
27// bz L1
28//
29// If the "sub" instruction all ready sets (or could be modified to set) the
30// same flag that the "cmp" instruction sets and that "bz" uses, then we can
31// eliminate the "cmp" instruction.
32//
33// Another instance, in this code:
34//
35// sub r1, r3 | sub r1, imm
36// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37// bge L1
38//
39// If the branch instruction can use flag from "sub", then we can replace
40// "sub" with "subs" and eliminate the "cmp" instruction.
41//
42// - Optimize Loads:
43//
44// Loads that can be folded into a later instruction. A load is foldable
45// if it loads to virtual registers and the virtual register defined has
46// a single use.
47//
48// - Optimize Copies and Bitcast (more generally, target specific copies):
49//
50// Rewrite copies and bitcasts to avoid cross register bank copies
51// when possible.
52// E.g., Consider the following example, where capital and lower
53// letters denote different register file:
54// b = copy A <-- cross-bank copy
55// C = copy b <-- cross-bank copy
56// =>
57// b = copy A <-- cross-bank copy
58// C = copy A <-- same-bank copy
59//
60// E.g., for bitcast:
61// b = bitcast A <-- cross-bank copy
62// C = bitcast b <-- cross-bank copy
63// =>
64// b = bitcast A <-- cross-bank copy
65// C = copy A <-- same-bank copy
66//===----------------------------------------------------------------------===//
67
69#include "llvm/ADT/DenseMap.h"
71#include "llvm/ADT/SmallSet.h"
73#include "llvm/ADT/Statistic.h"
89#include "llvm/MC/LaneBitmask.h"
90#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/Pass.h"
93#include "llvm/Support/Debug.h"
95#include <cassert>
96#include <cstdint>
97#include <memory>
98#include <utility>
99
100using namespace llvm;
103
104#define DEBUG_TYPE "peephole-opt"
105
106// Optimize Extensions
107static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
108 cl::desc("Aggressive extension optimization"));
109
110static cl::opt<bool>
111 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
112 cl::desc("Disable the peephole optimizer"));
113
114/// Specifiy whether or not the value tracking looks through
115/// complex instructions. When this is true, the value tracker
116/// bails on everything that is not a copy or a bitcast.
117static cl::opt<bool>
118 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
119 cl::desc("Disable advanced copy optimization"));
120
122 "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
123 cl::desc("Disable non-allocatable physical register copy optimization"));
124
125// Limit the number of PHI instructions to process
126// in PeepholeOptimizer::getNextSource.
128 RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10),
129 cl::desc("Limit the length of PHI chains to lookup"));
130
131// Limit the length of recurrence chain when evaluating the benefit of
132// commuting operands.
134 "recurrence-chain-limit", cl::Hidden, cl::init(3),
135 cl::desc("Maximum length of recurrence chain when evaluating the benefit "
136 "of commuting operands"));
137
138STATISTIC(NumReuse, "Number of extension results reused");
139STATISTIC(NumCmps, "Number of compares eliminated");
140STATISTIC(NumImmFold, "Number of move immediate folded");
141STATISTIC(NumLoadFold, "Number of loads folded");
142STATISTIC(NumSelects, "Number of selects optimized");
143STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
144STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
145STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
146
147namespace {
148
149class ValueTrackerResult;
150class RecurrenceInstr;
151
152class PeepholeOptimizer : private MachineFunction::Delegate {
153 const TargetInstrInfo *TII = nullptr;
154 const TargetRegisterInfo *TRI = nullptr;
155 MachineRegisterInfo *MRI = nullptr;
156 MachineDominatorTree *DT = nullptr; // Machine dominator tree
157 MachineLoopInfo *MLI = nullptr;
158
159public:
160 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
161 : DT(DT), MLI(MLI) {}
162
163 bool run(MachineFunction &MF);
164 /// Track Def -> Use info used for rewriting copies.
166
167 /// Sequence of instructions that formulate recurrence cycle.
168 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
169
170private:
171 bool optimizeCmpInstr(MachineInstr &MI);
172 bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
174 bool optimizeSelect(MachineInstr &MI,
176 bool optimizeCondBranch(MachineInstr &MI);
177 bool optimizeCoalescableCopy(MachineInstr &MI);
178 bool optimizeUncoalescableCopy(MachineInstr &MI,
180 bool optimizeRecurrence(MachineInstr &PHI);
181 bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
182 bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
184 bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
186 bool &Deleted);
187
188 /// Finds recurrence cycles, but only ones that formulated around
189 /// a def operand and a use operand that are tied. If there is a use
190 /// operand commutable with the tied use operand, find recurrence cycle
191 /// along that operand as well.
192 bool findTargetRecurrence(Register Reg,
193 const SmallSet<Register, 2> &TargetReg,
194 RecurrenceCycle &RC);
195
196 /// If copy instruction \p MI is a virtual register copy or a copy of a
197 /// constant physical register to a virtual register, track it in the
198 /// set CopySrcMIs. If this virtual register was previously seen as a
199 /// copy, replace the uses of this copy with the previously seen copy's
200 /// destination register.
201 bool foldRedundantCopy(MachineInstr &MI);
202
203 /// Is the register \p Reg a non-allocatable physical register?
204 bool isNAPhysCopy(Register Reg);
205
206 /// If copy instruction \p MI is a non-allocatable virtual<->physical
207 /// register copy, track it in the \p NAPhysToVirtMIs map. If this
208 /// non-allocatable physical register was previously copied to a virtual
209 /// registered and hasn't been clobbered, the virt->phys copy can be
210 /// deleted.
211 bool
212 foldRedundantNAPhysCopy(MachineInstr &MI,
213 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
214
215 bool isLoadFoldable(MachineInstr &MI,
216 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
217
218 /// Check whether \p MI is understood by the register coalescer
219 /// but may require some rewriting.
220 bool isCoalescableCopy(const MachineInstr &MI) {
221 // SubregToRegs are not interesting, because they are already register
222 // coalescer friendly.
223 return MI.isCopy() ||
224 (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
225 MI.isExtractSubreg()));
226 }
227
228 /// Check whether \p MI is a copy like instruction that is
229 /// not recognized by the register coalescer.
230 bool isUncoalescableCopy(const MachineInstr &MI) {
231 return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
232 MI.isInsertSubregLike() ||
233 MI.isExtractSubregLike()));
234 }
235
236 MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
237 RewriteMapTy &RewriteMap);
238
239 // Set of copies to virtual registers keyed by source register. Never
240 // holds any physreg which requires def tracking.
242
243 // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
244 void MF_HandleInsertion(MachineInstr &MI) override { return; }
245
246 bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
247 if (!MI.isCopy())
248 return false;
249
250 Register SrcReg = MI.getOperand(1).getReg();
251 unsigned SrcSubReg = MI.getOperand(1).getSubReg();
252 if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
253 return false;
254
255 SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
256 return true;
257 }
258
259 // If a COPY instruction is to be deleted or changed, we should also remove
260 // it from CopySrcMIs.
261 void deleteChangedCopy(MachineInstr &MI) {
262 RegSubRegPair SrcPair;
263 if (!getCopySrc(MI, SrcPair))
264 return;
265
266 auto It = CopySrcMIs.find(SrcPair);
267 if (It != CopySrcMIs.end() && It->second == &MI)
268 CopySrcMIs.erase(It);
269 }
270
271 void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
272
273 void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
274 deleteChangedCopy(MI);
275 }
276};
277
278class PeepholeOptimizerLegacy : public MachineFunctionPass {
279public:
280 static char ID; // Pass identification
281
282 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
284 }
285
286 bool runOnMachineFunction(MachineFunction &MF) override;
287
288 void getAnalysisUsage(AnalysisUsage &AU) const override {
289 AU.setPreservesCFG();
293 if (Aggressive) {
296 }
297 }
298
301 MachineFunctionProperties::Property::IsSSA);
302 }
303};
304
305/// Helper class to hold instructions that are inside recurrence cycles.
306/// The recurrence cycle is formulated around 1) a def operand and its
307/// tied use operand, or 2) a def operand and a use operand that is commutable
308/// with another use operand which is tied to the def operand. In the latter
309/// case, index of the tied use operand and the commutable use operand are
310/// maintained with CommutePair.
311class RecurrenceInstr {
312public:
313 using IndexPair = std::pair<unsigned, unsigned>;
314
315 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
316 RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
317 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
318
319 MachineInstr *getMI() const { return MI; }
320 std::optional<IndexPair> getCommutePair() const { return CommutePair; }
321
322private:
324 std::optional<IndexPair> CommutePair;
325};
326
327/// Helper class to hold a reply for ValueTracker queries.
328/// Contains the returned sources for a given search and the instructions
329/// where the sources were tracked from.
330class ValueTrackerResult {
331private:
332 /// Track all sources found by one ValueTracker query.
334
335 /// Instruction using the sources in 'RegSrcs'.
336 const MachineInstr *Inst = nullptr;
337
338public:
339 ValueTrackerResult() = default;
340
341 ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
342
343 bool isValid() const { return getNumSources() > 0; }
344
345 void setInst(const MachineInstr *I) { Inst = I; }
346 const MachineInstr *getInst() const { return Inst; }
347
348 void clear() {
349 RegSrcs.clear();
350 Inst = nullptr;
351 }
352
353 void addSource(Register SrcReg, unsigned SrcSubReg) {
354 RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
355 }
356
357 void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
358 assert(Idx < getNumSources() && "Reg pair source out of index");
359 RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
360 }
361
362 int getNumSources() const { return RegSrcs.size(); }
363
364 RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
365
366 Register getSrcReg(int Idx) const {
367 assert(Idx < getNumSources() && "Reg source out of index");
368 return RegSrcs[Idx].Reg;
369 }
370
371 unsigned getSrcSubReg(int Idx) const {
372 assert(Idx < getNumSources() && "SubReg source out of index");
373 return RegSrcs[Idx].SubReg;
374 }
375
376 bool operator==(const ValueTrackerResult &Other) const {
377 if (Other.getInst() != getInst())
378 return false;
379
380 if (Other.getNumSources() != getNumSources())
381 return false;
382
383 for (int i = 0, e = Other.getNumSources(); i != e; ++i)
384 if (Other.getSrcReg(i) != getSrcReg(i) ||
385 Other.getSrcSubReg(i) != getSrcSubReg(i))
386 return false;
387 return true;
388 }
389};
390
391/// Helper class to track the possible sources of a value defined by
392/// a (chain of) copy related instructions.
393/// Given a definition (instruction and definition index), this class
394/// follows the use-def chain to find successive suitable sources.
395/// The given source can be used to rewrite the definition into
396/// def = COPY src.
397///
398/// For instance, let us consider the following snippet:
399/// v0 =
400/// v2 = INSERT_SUBREG v1, v0, sub0
401/// def = COPY v2.sub0
402///
403/// Using a ValueTracker for def = COPY v2.sub0 will give the following
404/// suitable sources:
405/// v2.sub0 and v0.
406/// Then, def can be rewritten into def = COPY v0.
407class ValueTracker {
408private:
409 /// The current point into the use-def chain.
410 const MachineInstr *Def = nullptr;
411
412 /// The index of the definition in Def.
413 unsigned DefIdx = 0;
414
415 /// The sub register index of the definition.
416 unsigned DefSubReg;
417
418 /// The register where the value can be found.
420
421 /// MachineRegisterInfo used to perform tracking.
423
424 /// Optional TargetInstrInfo used to perform some complex tracking.
425 const TargetInstrInfo *TII;
426
427 /// Dispatcher to the right underlying implementation of getNextSource.
428 ValueTrackerResult getNextSourceImpl();
429
430 /// Specialized version of getNextSource for Copy instructions.
431 ValueTrackerResult getNextSourceFromCopy();
432
433 /// Specialized version of getNextSource for Bitcast instructions.
434 ValueTrackerResult getNextSourceFromBitcast();
435
436 /// Specialized version of getNextSource for RegSequence instructions.
437 ValueTrackerResult getNextSourceFromRegSequence();
438
439 /// Specialized version of getNextSource for InsertSubreg instructions.
440 ValueTrackerResult getNextSourceFromInsertSubreg();
441
442 /// Specialized version of getNextSource for ExtractSubreg instructions.
443 ValueTrackerResult getNextSourceFromExtractSubreg();
444
445 /// Specialized version of getNextSource for SubregToReg instructions.
446 ValueTrackerResult getNextSourceFromSubregToReg();
447
448 /// Specialized version of getNextSource for PHI instructions.
449 ValueTrackerResult getNextSourceFromPHI();
450
451public:
452 /// Create a ValueTracker instance for the value defined by \p Reg.
453 /// \p DefSubReg represents the sub register index the value tracker will
454 /// track. It does not need to match the sub register index used in the
455 /// definition of \p Reg.
456 /// If \p Reg is a physical register, a value tracker constructed with
457 /// this constructor will not find any alternative source.
458 /// Indeed, when \p Reg is a physical register that constructor does not
459 /// know which definition of \p Reg it should track.
460 /// Use the next constructor to track a physical register.
461 ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI,
462 const TargetInstrInfo *TII = nullptr)
463 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
464 if (!Reg.isPhysical()) {
465 Def = MRI.getVRegDef(Reg);
466 DefIdx = MRI.def_begin(Reg).getOperandNo();
467 }
468 }
469
470 /// Following the use-def chain, get the next available source
471 /// for the tracked value.
472 /// \return A ValueTrackerResult containing a set of registers
473 /// and sub registers with tracked values. A ValueTrackerResult with
474 /// an empty set of registers means no source was found.
475 ValueTrackerResult getNextSource();
476};
477
478} // end anonymous namespace
479
480char PeepholeOptimizerLegacy::ID = 0;
481
482char &llvm::PeepholeOptimizerLegacyID = PeepholeOptimizerLegacy::ID;
483
484INITIALIZE_PASS_BEGIN(PeepholeOptimizerLegacy, DEBUG_TYPE,
485 "Peephole Optimizations", false, false)
488INITIALIZE_PASS_END(PeepholeOptimizerLegacy, DEBUG_TYPE,
490
491/// If instruction is a copy-like instruction, i.e. it reads a single register
492/// and writes a single register and it does not modify the source, and if the
493/// source value is preserved as a sub-register of the result, then replace all
494/// reachable uses of the source with the subreg of the result.
495///
496/// Do not generate an EXTRACT that is used only in a debug use, as this changes
497/// the code. Since this code does not currently share EXTRACTs, just ignore all
498/// debug uses.
499bool PeepholeOptimizer::optimizeExtInstr(
501 SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
502 Register SrcReg, DstReg;
503 unsigned SubIdx;
504 if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
505 return false;
506
507 if (DstReg.isPhysical() || SrcReg.isPhysical())
508 return false;
509
510 if (MRI->hasOneNonDBGUse(SrcReg))
511 // No other uses.
512 return false;
513
514 // Ensure DstReg can get a register class that actually supports
515 // sub-registers. Don't change the class until we commit.
516 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
517 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
518 if (!DstRC)
519 return false;
520
521 // The ext instr may be operating on a sub-register of SrcReg as well.
522 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
523 // register.
524 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
525 // SrcReg:SubIdx should be replaced.
526 bool UseSrcSubIdx =
527 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
528
529 // The source has other uses. See if we can replace the other uses with use of
530 // the result of the extension.
532 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
533 ReachedBBs.insert(UI.getParent());
534
535 // Uses that are in the same BB of uses of the result of the instruction.
537
538 // Uses that the result of the instruction can reach.
540
541 bool ExtendLife = true;
542 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
543 MachineInstr *UseMI = UseMO.getParent();
544 if (UseMI == &MI)
545 continue;
546
547 if (UseMI->isPHI()) {
548 ExtendLife = false;
549 continue;
550 }
551
552 // Only accept uses of SrcReg:SubIdx.
553 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
554 continue;
555
556 // It's an error to translate this:
557 //
558 // %reg1025 = <sext> %reg1024
559 // ...
560 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
561 //
562 // into this:
563 //
564 // %reg1025 = <sext> %reg1024
565 // ...
566 // %reg1027 = COPY %reg1025:4
567 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
568 //
569 // The problem here is that SUBREG_TO_REG is there to assert that an
570 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
571 // the COPY here, it will give us the value after the <sext>, not the
572 // original value of %reg1024 before <sext>.
573 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
574 continue;
575
576 MachineBasicBlock *UseMBB = UseMI->getParent();
577 if (UseMBB == &MBB) {
578 // Local uses that come after the extension.
579 if (!LocalMIs.count(UseMI))
580 Uses.push_back(&UseMO);
581 } else if (ReachedBBs.count(UseMBB)) {
582 // Non-local uses where the result of the extension is used. Always
583 // replace these unless it's a PHI.
584 Uses.push_back(&UseMO);
585 } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
586 // We may want to extend the live range of the extension result in order
587 // to replace these uses.
588 ExtendedUses.push_back(&UseMO);
589 } else {
590 // Both will be live out of the def MBB anyway. Don't extend live range of
591 // the extension result.
592 ExtendLife = false;
593 break;
594 }
595 }
596
597 if (ExtendLife && !ExtendedUses.empty())
598 // Extend the liveness of the extension result.
599 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
600
601 // Now replace all uses.
602 bool Changed = false;
603 if (!Uses.empty()) {
605
606 // Look for PHI uses of the extended result, we don't want to extend the
607 // liveness of a PHI input. It breaks all kinds of assumptions down
608 // stream. A PHI use is expected to be the kill of its source values.
609 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
610 if (UI.isPHI())
611 PHIBBs.insert(UI.getParent());
612
613 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
614 for (MachineOperand *UseMO : Uses) {
615 MachineInstr *UseMI = UseMO->getParent();
616 MachineBasicBlock *UseMBB = UseMI->getParent();
617 if (PHIBBs.count(UseMBB))
618 continue;
619
620 // About to add uses of DstReg, clear DstReg's kill flags.
621 if (!Changed) {
622 MRI->clearKillFlags(DstReg);
623 MRI->constrainRegClass(DstReg, DstRC);
624 }
625
626 // SubReg defs are illegal in machine SSA phase,
627 // we should not generate SubReg defs.
628 //
629 // For example, for the instructions:
630 //
631 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
632 // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
633 //
634 // We should generate:
635 //
636 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
637 // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
638 // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
639 //
640 if (UseSrcSubIdx)
641 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
642
643 Register NewVR = MRI->createVirtualRegister(RC);
644 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
645 TII->get(TargetOpcode::COPY), NewVR)
646 .addReg(DstReg, 0, SubIdx);
647 if (UseSrcSubIdx)
648 UseMO->setSubReg(0);
649
650 UseMO->setReg(NewVR);
651 ++NumReuse;
652 Changed = true;
653 }
654 }
655
656 return Changed;
657}
658
659/// If the instruction is a compare and the previous instruction it's comparing
660/// against already sets (or could be modified to set) the same flag as the
661/// compare, then we can remove the comparison and use the flag from the
662/// previous instruction.
663bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
664 // If this instruction is a comparison against zero and isn't comparing a
665 // physical register, we can try to optimize it.
666 Register SrcReg, SrcReg2;
667 int64_t CmpMask, CmpValue;
668 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
669 SrcReg.isPhysical() || SrcReg2.isPhysical())
670 return false;
671
672 // Attempt to optimize the comparison instruction.
673 LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
674 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
675 LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
676 ++NumCmps;
677 return true;
678 }
679
680 return false;
681}
682
683/// Optimize a select instruction.
684bool PeepholeOptimizer::optimizeSelect(
686 unsigned TrueOp = 0;
687 unsigned FalseOp = 0;
688 bool Optimizable = false;
690 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
691 return false;
692 if (!Optimizable)
693 return false;
694 if (!TII->optimizeSelect(MI, LocalMIs))
695 return false;
696 LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
697 MI.eraseFromParent();
698 ++NumSelects;
699 return true;
700}
701
702/// Check if a simpler conditional branch can be generated.
703bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
704 return TII->optimizeCondBranch(MI);
705}
706
707/// Try to find the next source that share the same register file
708/// for the value defined by \p Reg and \p SubReg.
709/// When true is returned, the \p RewriteMap can be used by the client to
710/// retrieve all Def -> Use along the way up to the next source. Any found
711/// Use that is not itself a key for another entry, is the next source to
712/// use. During the search for the next source, multiple sources can be found
713/// given multiple incoming sources of a PHI instruction. In this case, we
714/// look in each PHI source for the next source; all found next sources must
715/// share the same register file as \p Reg and \p SubReg. The client should
716/// then be capable to rewrite all intermediate PHIs to get the next source.
717/// \return False if no alternative sources are available. True otherwise.
718bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
719 RewriteMapTy &RewriteMap) {
720 // Do not try to find a new source for a physical register.
721 // So far we do not have any motivating example for doing that.
722 // Thus, instead of maintaining untested code, we will revisit that if
723 // that changes at some point.
724 Register Reg = RegSubReg.Reg;
725 if (Reg.isPhysical())
726 return false;
727 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
728
730 RegSubRegPair CurSrcPair = RegSubReg;
731 SrcToLook.push_back(CurSrcPair);
732
733 unsigned PHICount = 0;
734 do {
735 CurSrcPair = SrcToLook.pop_back_val();
736 // As explained above, do not handle physical registers
737 if (CurSrcPair.Reg.isPhysical())
738 return false;
739
740 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
741
742 // Follow the chain of copies until we find a more suitable source, a phi
743 // or have to abort.
744 while (true) {
745 ValueTrackerResult Res = ValTracker.getNextSource();
746 // Abort at the end of a chain (without finding a suitable source).
747 if (!Res.isValid())
748 return false;
749
750 // Insert the Def -> Use entry for the recently found source.
751 ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
752 if (CurSrcRes.isValid()) {
753 assert(CurSrcRes == Res && "ValueTrackerResult found must match");
754 // An existent entry with multiple sources is a PHI cycle we must avoid.
755 // Otherwise it's an entry with a valid next source we already found.
756 if (CurSrcRes.getNumSources() > 1) {
758 << "findNextSource: found PHI cycle, aborting...\n");
759 return false;
760 }
761 break;
762 }
763 RewriteMap.insert(std::make_pair(CurSrcPair, Res));
764
765 // ValueTrackerResult usually have one source unless it's the result from
766 // a PHI instruction. Add the found PHI edges to be looked up further.
767 unsigned NumSrcs = Res.getNumSources();
768 if (NumSrcs > 1) {
769 PHICount++;
770 if (PHICount >= RewritePHILimit) {
771 LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
772 return false;
773 }
774
775 for (unsigned i = 0; i < NumSrcs; ++i)
776 SrcToLook.push_back(Res.getSrc(i));
777 break;
778 }
779
780 CurSrcPair = Res.getSrc(0);
781 // Do not extend the live-ranges of physical registers as they add
782 // constraints to the register allocator. Moreover, if we want to extend
783 // the live-range of a physical register, unlike SSA virtual register,
784 // we will have to check that they aren't redefine before the related use.
785 if (CurSrcPair.Reg.isPhysical())
786 return false;
787
788 // Keep following the chain if the value isn't any better yet.
789 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
790 if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
791 CurSrcPair.SubReg))
792 continue;
793
794 // We currently cannot deal with subreg operands on PHI instructions
795 // (see insertPHI()).
796 if (PHICount > 0 && CurSrcPair.SubReg != 0)
797 continue;
798
799 // We found a suitable source, and are done with this chain.
800 break;
801 }
802 } while (!SrcToLook.empty());
803
804 // If we did not find a more suitable source, there is nothing to optimize.
805 return CurSrcPair.Reg != Reg;
806}
807
808/// Insert a PHI instruction with incoming edges \p SrcRegs that are
809/// guaranteed to have the same register class. This is necessary whenever we
810/// successfully traverse a PHI instruction and find suitable sources coming
811/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
812/// suitable to be used in a new COPY instruction.
814 const TargetInstrInfo &TII,
815 const SmallVectorImpl<RegSubRegPair> &SrcRegs,
816 MachineInstr &OrigPHI) {
817 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
818
819 const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
820 // NewRC is only correct if no subregisters are involved. findNextSource()
821 // should have rejected those cases already.
822 assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
823 Register NewVR = MRI.createVirtualRegister(NewRC);
824 MachineBasicBlock *MBB = OrigPHI.getParent();
825 MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
826 TII.get(TargetOpcode::PHI), NewVR);
827
828 unsigned MBBOpIdx = 2;
829 for (const RegSubRegPair &RegPair : SrcRegs) {
830 MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
831 MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
832 // Since we're extended the lifetime of RegPair.Reg, clear the
833 // kill flags to account for that and make RegPair.Reg reaches
834 // the new PHI.
835 MRI.clearKillFlags(RegPair.Reg);
836 MBBOpIdx += 2;
837 }
838
839 return *MIB;
840}
841
842namespace {
843
844/// Interface to query instructions amenable to copy rewriting.
845class Rewriter {
846protected:
847 MachineInstr &CopyLike;
848 unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
849public:
850 Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
851 virtual ~Rewriter() = default;
852
853 /// Get the next rewritable source (SrcReg, SrcSubReg) and
854 /// the related value that it affects (DstReg, DstSubReg).
855 /// A source is considered rewritable if its register class and the
856 /// register class of the related DstReg may not be register
857 /// coalescer friendly. In other words, given a copy-like instruction
858 /// not all the arguments may be returned at rewritable source, since
859 /// some arguments are none to be register coalescer friendly.
860 ///
861 /// Each call of this method moves the current source to the next
862 /// rewritable source.
863 /// For instance, let CopyLike be the instruction to rewrite.
864 /// CopyLike has one definition and one source:
865 /// dst.dstSubIdx = CopyLike src.srcSubIdx.
866 ///
867 /// The first call will give the first rewritable source, i.e.,
868 /// the only source this instruction has:
869 /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
870 /// This source defines the whole definition, i.e.,
871 /// (DstReg, DstSubReg) = (dst, dstSubIdx).
872 ///
873 /// The second and subsequent calls will return false, as there is only one
874 /// rewritable source.
875 ///
876 /// \return True if a rewritable source has been found, false otherwise.
877 /// The output arguments are valid if and only if true is returned.
878 virtual bool getNextRewritableSource(RegSubRegPair &Src,
879 RegSubRegPair &Dst) = 0;
880
881 /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
882 /// \return True if the rewriting was possible, false otherwise.
883 virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
884};
885
886/// Rewriter for COPY instructions.
887class CopyRewriter : public Rewriter {
888public:
889 CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
890 assert(MI.isCopy() && "Expected copy instruction");
891 }
892 virtual ~CopyRewriter() = default;
893
894 bool getNextRewritableSource(RegSubRegPair &Src,
895 RegSubRegPair &Dst) override {
896 // CurrentSrcIdx > 0 means this function has already been called.
897 if (CurrentSrcIdx > 0)
898 return false;
899 // This is the first call to getNextRewritableSource.
900 // Move the CurrentSrcIdx to remember that we made that call.
901 CurrentSrcIdx = 1;
902 // The rewritable source is the argument.
903 const MachineOperand &MOSrc = CopyLike.getOperand(1);
904 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
905 // What we track are the alternative sources of the definition.
906 const MachineOperand &MODef = CopyLike.getOperand(0);
907 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
908 return true;
909 }
910
911 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
912 if (CurrentSrcIdx != 1)
913 return false;
914 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
915 MOSrc.setReg(NewReg);
916 MOSrc.setSubReg(NewSubReg);
917 return true;
918 }
919};
920
921/// Helper class to rewrite uncoalescable copy like instructions
922/// into new COPY (coalescable friendly) instructions.
923class UncoalescableRewriter : public Rewriter {
924 unsigned NumDefs; ///< Number of defs in the bitcast.
925
926public:
927 UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
928 NumDefs = MI.getDesc().getNumDefs();
929 }
930
931 /// \see See Rewriter::getNextRewritableSource()
932 /// All such sources need to be considered rewritable in order to
933 /// rewrite a uncoalescable copy-like instruction. This method return
934 /// each definition that must be checked if rewritable.
935 bool getNextRewritableSource(RegSubRegPair &Src,
936 RegSubRegPair &Dst) override {
937 // Find the next non-dead definition and continue from there.
938 if (CurrentSrcIdx == NumDefs)
939 return false;
940
941 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
942 ++CurrentSrcIdx;
943 if (CurrentSrcIdx == NumDefs)
944 return false;
945 }
946
947 // What we track are the alternative sources of the definition.
948 Src = RegSubRegPair(0, 0);
949 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
950 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
951
952 CurrentSrcIdx++;
953 return true;
954 }
955
956 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
957 return false;
958 }
959};
960
961/// Specialized rewriter for INSERT_SUBREG instruction.
962class InsertSubregRewriter : public Rewriter {
963public:
964 InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
965 assert(MI.isInsertSubreg() && "Invalid instruction");
966 }
967
968 /// \see See Rewriter::getNextRewritableSource()
969 /// Here CopyLike has the following form:
970 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
971 /// Src1 has the same register class has dst, hence, there is
972 /// nothing to rewrite.
973 /// Src2.src2SubIdx, may not be register coalescer friendly.
974 /// Therefore, the first call to this method returns:
975 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
976 /// (DstReg, DstSubReg) = (dst, subIdx).
977 ///
978 /// Subsequence calls will return false.
979 bool getNextRewritableSource(RegSubRegPair &Src,
980 RegSubRegPair &Dst) override {
981 // If we already get the only source we can rewrite, return false.
982 if (CurrentSrcIdx == 2)
983 return false;
984 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
985 CurrentSrcIdx = 2;
986 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
987 Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
988 const MachineOperand &MODef = CopyLike.getOperand(0);
989
990 // We want to track something that is compatible with the
991 // partial definition.
992 if (MODef.getSubReg())
993 // Bail if we have to compose sub-register indices.
994 return false;
995 Dst = RegSubRegPair(MODef.getReg(),
996 (unsigned)CopyLike.getOperand(3).getImm());
997 return true;
998 }
999
1000 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
1001 if (CurrentSrcIdx != 2)
1002 return false;
1003 // We are rewriting the inserted reg.
1004 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
1005 MO.setReg(NewReg);
1006 MO.setSubReg(NewSubReg);
1007 return true;
1008 }
1009};
1010
1011/// Specialized rewriter for EXTRACT_SUBREG instruction.
1012class ExtractSubregRewriter : public Rewriter {
1013 const TargetInstrInfo &TII;
1014
1015public:
1016 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
1017 : Rewriter(MI), TII(TII) {
1018 assert(MI.isExtractSubreg() && "Invalid instruction");
1019 }
1020
1021 /// \see Rewriter::getNextRewritableSource()
1022 /// Here CopyLike has the following form:
1023 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
1024 /// There is only one rewritable source: Src.subIdx,
1025 /// which defines dst.dstSubIdx.
1026 bool getNextRewritableSource(RegSubRegPair &Src,
1027 RegSubRegPair &Dst) override {
1028 // If we already get the only source we can rewrite, return false.
1029 if (CurrentSrcIdx == 1)
1030 return false;
1031 // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
1032 CurrentSrcIdx = 1;
1033 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
1034 // If we have to compose sub-register indices, bail out.
1035 if (MOExtractedReg.getSubReg())
1036 return false;
1037
1038 Src =
1039 RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
1040
1041 // We want to track something that is compatible with the definition.
1042 const MachineOperand &MODef = CopyLike.getOperand(0);
1043 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1044 return true;
1045 }
1046
1047 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
1048 // The only source we can rewrite is the input register.
1049 if (CurrentSrcIdx != 1)
1050 return false;
1051
1052 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
1053
1054 // If we find a source that does not require to extract something,
1055 // rewrite the operation with a copy.
1056 if (!NewSubReg) {
1057 // Move the current index to an invalid position.
1058 // We do not want another call to this method to be able
1059 // to do any change.
1060 CurrentSrcIdx = -1;
1061 // Rewrite the operation as a COPY.
1062 // Get rid of the sub-register index.
1063 CopyLike.removeOperand(2);
1064 // Morph the operation into a COPY.
1065 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
1066 return true;
1067 }
1068 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
1069 return true;
1070 }
1071};
1072
1073/// Specialized rewriter for REG_SEQUENCE instruction.
1074class RegSequenceRewriter : public Rewriter {
1075public:
1076 RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
1077 assert(MI.isRegSequence() && "Invalid instruction");
1078 }
1079
1080 /// \see Rewriter::getNextRewritableSource()
1081 /// Here CopyLike has the following form:
1082 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
1083 /// Each call will return a different source, walking all the available
1084 /// source.
1085 ///
1086 /// The first call returns:
1087 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
1088 /// (DstReg, DstSubReg) = (dst, subIdx1).
1089 ///
1090 /// The second call returns:
1091 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
1092 /// (DstReg, DstSubReg) = (dst, subIdx2).
1093 ///
1094 /// And so on, until all the sources have been traversed, then
1095 /// it returns false.
1096 bool getNextRewritableSource(RegSubRegPair &Src,
1097 RegSubRegPair &Dst) override {
1098 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
1099
1100 // If this is the first call, move to the first argument.
1101 if (CurrentSrcIdx == 0) {
1102 CurrentSrcIdx = 1;
1103 } else {
1104 // Otherwise, move to the next argument and check that it is valid.
1105 CurrentSrcIdx += 2;
1106 if (CurrentSrcIdx >= CopyLike.getNumOperands())
1107 return false;
1108 }
1109 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
1110 Src.Reg = MOInsertedReg.getReg();
1111 // If we have to compose sub-register indices, bail out.
1112 if ((Src.SubReg = MOInsertedReg.getSubReg()))
1113 return false;
1114
1115 // We want to track something that is compatible with the related
1116 // partial definition.
1117 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
1118
1119 const MachineOperand &MODef = CopyLike.getOperand(0);
1120 Dst.Reg = MODef.getReg();
1121 // If we have to compose sub-registers, bail.
1122 return MODef.getSubReg() == 0;
1123 }
1124
1125 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
1126 // We cannot rewrite out of bound operands.
1127 // Moreover, rewritable sources are at odd positions.
1128 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
1129 return false;
1130
1131 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
1132 MO.setReg(NewReg);
1133 MO.setSubReg(NewSubReg);
1134 return true;
1135 }
1136};
1137
1138} // end anonymous namespace
1139
1140/// Get the appropriated Rewriter for \p MI.
1141/// \return A pointer to a dynamically allocated Rewriter or nullptr if no
1142/// rewriter works for \p MI.
1144 // Handle uncoalescable copy-like instructions.
1145 if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1146 MI.isExtractSubregLike())
1147 return new UncoalescableRewriter(MI);
1148
1149 switch (MI.getOpcode()) {
1150 default:
1151 return nullptr;
1152 case TargetOpcode::COPY:
1153 return new CopyRewriter(MI);
1154 case TargetOpcode::INSERT_SUBREG:
1155 return new InsertSubregRewriter(MI);
1156 case TargetOpcode::EXTRACT_SUBREG:
1157 return new ExtractSubregRewriter(MI, TII);
1158 case TargetOpcode::REG_SEQUENCE:
1159 return new RegSequenceRewriter(MI);
1160 }
1161}
1162
1163/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
1164/// the new source to use for rewrite. If \p HandleMultipleSources is true and
1165/// multiple sources for a given \p Def are found along the way, we found a
1166/// PHI instructions that needs to be rewritten.
1167/// TODO: HandleMultipleSources should be removed once we test PHI handling
1168/// with coalescable copies.
1169static RegSubRegPair
1171 RegSubRegPair Def,
1172 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1173 bool HandleMultipleSources = true) {
1174 RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1175 while (true) {
1176 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1177 // If there are no entries on the map, LookupSrc is the new source.
1178 if (!Res.isValid())
1179 return LookupSrc;
1180
1181 // There's only one source for this definition, keep searching...
1182 unsigned NumSrcs = Res.getNumSources();
1183 if (NumSrcs == 1) {
1184 LookupSrc.Reg = Res.getSrcReg(0);
1185 LookupSrc.SubReg = Res.getSrcSubReg(0);
1186 continue;
1187 }
1188
1189 // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1190 if (!HandleMultipleSources)
1191 break;
1192
1193 // Multiple sources, recurse into each source to find a new source
1194 // for it. Then, rewrite the PHI accordingly to its new edges.
1196 for (unsigned i = 0; i < NumSrcs; ++i) {
1197 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1198 NewPHISrcs.push_back(
1199 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1200 }
1201
1202 // Build the new PHI node and return its def register as the new source.
1203 MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1204 MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1205 LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1206 LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
1207 LLVM_DEBUG(dbgs() << " With: " << NewPHI);
1208 const MachineOperand &MODef = NewPHI.getOperand(0);
1209 return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1210 }
1211
1212 return RegSubRegPair(0, 0);
1213}
1214
1215/// Optimize generic copy instructions to avoid cross register bank copy.
1216/// The optimization looks through a chain of copies and tries to find a source
1217/// that has a compatible register class.
1218/// Two register classes are considered to be compatible if they share the same
1219/// register bank.
1220/// New copies issued by this optimization are register allocator
1221/// friendly. This optimization does not remove any copy as it may
1222/// overconstrain the register allocator, but replaces some operands
1223/// when possible.
1224/// \pre isCoalescableCopy(*MI) is true.
1225/// \return True, when \p MI has been rewritten. False otherwise.
1226bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1227 assert(isCoalescableCopy(MI) && "Invalid argument");
1228 assert(MI.getDesc().getNumDefs() == 1 &&
1229 "Coalescer can understand multiple defs?!");
1230 const MachineOperand &MODef = MI.getOperand(0);
1231 // Do not rewrite physical definitions.
1232 if (MODef.getReg().isPhysical())
1233 return false;
1234
1235 bool Changed = false;
1236 // Get the right rewriter for the current copy.
1237 std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
1238 // If none exists, bail out.
1239 if (!CpyRewriter)
1240 return false;
1241 // Rewrite each rewritable source.
1242 RegSubRegPair Src;
1243 RegSubRegPair TrackPair;
1244 while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
1245 // Keep track of PHI nodes and its incoming edges when looking for sources.
1246 RewriteMapTy RewriteMap;
1247 // Try to find a more suitable source. If we failed to do so, or get the
1248 // actual source, move to the next source.
1249 if (!findNextSource(TrackPair, RewriteMap))
1250 continue;
1251
1252 // Get the new source to rewrite. TODO: Only enable handling of multiple
1253 // sources (PHIs) once we have a motivating example and testcases for it.
1254 RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1255 /*HandleMultipleSources=*/false);
1256 if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
1257 continue;
1258
1259 // Rewrite source.
1260 if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1261 // We may have extended the live-range of NewSrc, account for that.
1262 MRI->clearKillFlags(NewSrc.Reg);
1263 Changed = true;
1264 }
1265 }
1266 // TODO: We could have a clean-up method to tidy the instruction.
1267 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1268 // => v0 = COPY v1
1269 // Currently we haven't seen motivating example for that and we
1270 // want to avoid untested code.
1271 NumRewrittenCopies += Changed;
1272 return Changed;
1273}
1274
1275/// Rewrite the source found through \p Def, by using the \p RewriteMap
1276/// and create a new COPY instruction. More info about RewriteMap in
1277/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1278/// Uncoalescable copies, since they are copy like instructions that aren't
1279/// recognized by the register allocator.
1280MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1281 RegSubRegPair Def,
1282 RewriteMapTy &RewriteMap) {
1283 assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
1284
1285 // Find the new source to use in the COPY rewrite.
1286 RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1287
1288 // Insert the COPY.
1289 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1290 Register NewVReg = MRI->createVirtualRegister(DefRC);
1291
1292 MachineInstr *NewCopy =
1293 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1294 TII->get(TargetOpcode::COPY), NewVReg)
1295 .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
1296
1297 if (Def.SubReg) {
1298 NewCopy->getOperand(0).setSubReg(Def.SubReg);
1299 NewCopy->getOperand(0).setIsUndef();
1300 }
1301
1302 LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1303 LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
1304 LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
1305 MRI->replaceRegWith(Def.Reg, NewVReg);
1306 MRI->clearKillFlags(NewVReg);
1307
1308 // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1309 // account for that.
1310 MRI->clearKillFlags(NewSrc.Reg);
1311
1312 return *NewCopy;
1313}
1314
1315/// Optimize copy-like instructions to create
1316/// register coalescer friendly instruction.
1317/// The optimization tries to kill-off the \p MI by looking
1318/// through a chain of copies to find a source that has a compatible
1319/// register class.
1320/// If such a source is found, it replace \p MI by a generic COPY
1321/// operation.
1322/// \pre isUncoalescableCopy(*MI) is true.
1323/// \return True, when \p MI has been optimized. In that case, \p MI has
1324/// been removed from its parent.
1325/// All COPY instructions created, are inserted in \p LocalMIs.
1326bool PeepholeOptimizer::optimizeUncoalescableCopy(
1328 assert(isUncoalescableCopy(MI) && "Invalid argument");
1329 UncoalescableRewriter CpyRewriter(MI);
1330
1331 // Rewrite each rewritable source by generating new COPYs. This works
1332 // differently from optimizeCoalescableCopy since it first makes sure that all
1333 // definitions can be rewritten.
1334 RewriteMapTy RewriteMap;
1335 RegSubRegPair Src;
1337 SmallVector<RegSubRegPair, 4> RewritePairs;
1338 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1339 // If a physical register is here, this is probably for a good reason.
1340 // Do not rewrite that.
1341 if (Def.Reg.isPhysical())
1342 return false;
1343
1344 // If we do not know how to rewrite this definition, there is no point
1345 // in trying to kill this instruction.
1346 if (!findNextSource(Def, RewriteMap))
1347 return false;
1348
1349 RewritePairs.push_back(Def);
1350 }
1351
1352 // The change is possible for all defs, do it.
1353 for (const RegSubRegPair &Def : RewritePairs) {
1354 // Rewrite the "copy" in a way the register coalescer understands.
1355 MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1356 LocalMIs.insert(&NewCopy);
1357 }
1358
1359 // MI is now dead.
1360 LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1361 MI.eraseFromParent();
1362 ++NumUncoalescableCopies;
1363 return true;
1364}
1365
1366/// Check whether MI is a candidate for folding into a later instruction.
1367/// We only fold loads to virtual registers and the virtual register defined
1368/// has a single user.
1369bool PeepholeOptimizer::isLoadFoldable(
1370 MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1371 if (!MI.canFoldAsLoad() || !MI.mayLoad())
1372 return false;
1373 const MCInstrDesc &MCID = MI.getDesc();
1374 if (MCID.getNumDefs() != 1)
1375 return false;
1376
1377 Register Reg = MI.getOperand(0).getReg();
1378 // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1379 // loads. It should be checked when processing uses of the load, since
1380 // uses can be removed during peephole.
1381 if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1382 MRI->hasOneNonDBGUser(Reg)) {
1383 FoldAsLoadDefCandidates.insert(Reg);
1384 return true;
1385 }
1386 return false;
1387}
1388
1389bool PeepholeOptimizer::isMoveImmediate(
1392 const MCInstrDesc &MCID = MI.getDesc();
1393 if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
1394 return false;
1395 Register Reg = MI.getOperand(0).getReg();
1396 if (!Reg.isVirtual())
1397 return false;
1398
1399 int64_t ImmVal;
1400 if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
1401 return false;
1402
1403 ImmDefMIs.insert(std::make_pair(Reg, &MI));
1404 ImmDefRegs.insert(Reg);
1405 return true;
1406}
1407
1408/// Try folding register operands that are defined by move immediate
1409/// instructions, i.e. a trivial constant folding optimization, if
1410/// and only if the def and use are in the same BB.
1411bool PeepholeOptimizer::foldImmediate(
1413 DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
1414 Deleted = false;
1415 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1416 MachineOperand &MO = MI.getOperand(i);
1417 if (!MO.isReg() || MO.isDef())
1418 continue;
1419 Register Reg = MO.getReg();
1420 if (!Reg.isVirtual())
1421 continue;
1422 if (ImmDefRegs.count(Reg) == 0)
1423 continue;
1425 assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1426 if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
1427 ++NumImmFold;
1428 // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
1429 // is not deleted, and we happened to get a same MI, we can delete MI and
1430 // replace its users.
1431 if (MRI->getVRegDef(Reg) &&
1432 MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
1433 Register DstReg = MI.getOperand(0).getReg();
1434 if (DstReg.isVirtual() &&
1435 MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
1436 MRI->replaceRegWith(DstReg, Reg);
1437 MI.eraseFromParent();
1438 Deleted = true;
1439 }
1440 }
1441 return true;
1442 }
1443 }
1444 return false;
1445}
1446
1447// FIXME: This is very simple and misses some cases which should be handled when
1448// motivating examples are found.
1449//
1450// The copy rewriting logic should look at uses as well as defs and be able to
1451// eliminate copies across blocks.
1452//
1453// Later copies that are subregister extracts will also not be eliminated since
1454// only the first copy is considered.
1455//
1456// e.g.
1457// %1 = COPY %0
1458// %2 = COPY %0:sub1
1459//
1460// Should replace %2 uses with %1:sub1
1461bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
1462 assert(MI.isCopy() && "expected a COPY machine instruction");
1463
1464 RegSubRegPair SrcPair;
1465 if (!getCopySrc(MI, SrcPair))
1466 return false;
1467
1468 Register DstReg = MI.getOperand(0).getReg();
1469 if (!DstReg.isVirtual())
1470 return false;
1471
1472 if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1473 // First copy of this reg seen.
1474 return false;
1475 }
1476
1477 MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
1478
1479 assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1480 "Unexpected mismatching subreg!");
1481
1482 Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1483
1484 // Only replace if the copy register class is the same.
1485 //
1486 // TODO: If we have multiple copies to different register classes, we may want
1487 // to track multiple copies of the same source register.
1488 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1489 return false;
1490
1491 MRI->replaceRegWith(DstReg, PrevDstReg);
1492
1493 // Lifetime of the previous copy has been extended.
1494 MRI->clearKillFlags(PrevDstReg);
1495 return true;
1496}
1497
1498bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1499 return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1500}
1501
1502bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1504 assert(MI.isCopy() && "expected a COPY machine instruction");
1505
1507 return false;
1508
1509 Register DstReg = MI.getOperand(0).getReg();
1510 Register SrcReg = MI.getOperand(1).getReg();
1511 if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1512 // %vreg = COPY $physreg
1513 // Avoid using a datastructure which can track multiple live non-allocatable
1514 // phys->virt copies since LLVM doesn't seem to do this.
1515 NAPhysToVirtMIs.insert({SrcReg, &MI});
1516 return false;
1517 }
1518
1519 if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1520 return false;
1521
1522 // $physreg = COPY %vreg
1523 auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1524 if (PrevCopy == NAPhysToVirtMIs.end()) {
1525 // We can't remove the copy: there was an intervening clobber of the
1526 // non-allocatable physical register after the copy to virtual.
1527 LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1528 << MI);
1529 return false;
1530 }
1531
1532 Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1533 if (PrevDstReg == SrcReg) {
1534 // Remove the virt->phys copy: we saw the virtual register definition, and
1535 // the non-allocatable physical register's state hasn't changed since then.
1536 LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1537 ++NumNAPhysCopies;
1538 return true;
1539 }
1540
1541 // Potential missed optimization opportunity: we saw a different virtual
1542 // register get a copy of the non-allocatable physical register, and we only
1543 // track one such copy. Avoid getting confused by this new non-allocatable
1544 // physical register definition, and remove it from the tracked copies.
1545 LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1546 NAPhysToVirtMIs.erase(PrevCopy);
1547 return false;
1548}
1549
1550/// \bried Returns true if \p MO is a virtual register operand.
1552 return MO.isReg() && MO.getReg().isVirtual();
1553}
1554
1555bool PeepholeOptimizer::findTargetRecurrence(
1556 Register Reg, const SmallSet<Register, 2> &TargetRegs,
1557 RecurrenceCycle &RC) {
1558 // Recurrence found if Reg is in TargetRegs.
1559 if (TargetRegs.count(Reg))
1560 return true;
1561
1562 // TODO: Curerntly, we only allow the last instruction of the recurrence
1563 // cycle (the instruction that feeds the PHI instruction) to have more than
1564 // one uses to guarantee that commuting operands does not tie registers
1565 // with overlapping live range. Once we have actual live range info of
1566 // each register, this constraint can be relaxed.
1567 if (!MRI->hasOneNonDBGUse(Reg))
1568 return false;
1569
1570 // Give up if the reccurrence chain length is longer than the limit.
1571 if (RC.size() >= MaxRecurrenceChain)
1572 return false;
1573
1574 MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1575 unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
1576
1577 // Only interested in recurrences whose instructions have only one def, which
1578 // is a virtual register.
1579 if (MI.getDesc().getNumDefs() != 1)
1580 return false;
1581
1582 MachineOperand &DefOp = MI.getOperand(0);
1583 if (!isVirtualRegisterOperand(DefOp))
1584 return false;
1585
1586 // Check if def operand of MI is tied to any use operand. We are only
1587 // interested in the case that all the instructions in the recurrence chain
1588 // have there def operand tied with one of the use operand.
1589 unsigned TiedUseIdx;
1590 if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1591 return false;
1592
1593 if (Idx == TiedUseIdx) {
1594 RC.push_back(RecurrenceInstr(&MI));
1595 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1596 } else {
1597 // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1598 unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1599 if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1600 RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1601 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1602 }
1603 }
1604
1605 return false;
1606}
1607
1608/// Phi instructions will eventually be lowered to copy instructions.
1609/// If phi is in a loop header, a recurrence may formulated around the source
1610/// and destination of the phi. For such case commuting operands of the
1611/// instructions in the recurrence may enable coalescing of the copy instruction
1612/// generated from the phi. For example, if there is a recurrence of
1613///
1614/// LoopHeader:
1615/// %1 = phi(%0, %100)
1616/// LoopLatch:
1617/// %0<def, tied1> = ADD %2<def, tied0>, %1
1618///
1619/// , the fact that %0 and %2 are in the same tied operands set makes
1620/// the coalescing of copy instruction generated from the phi in
1621/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1622/// %2 have overlapping live range. This introduces additional move
1623/// instruction to the final assembly. However, if we commute %2 and
1624/// %1 of ADD instruction, the redundant move instruction can be
1625/// avoided.
1626bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1627 SmallSet<Register, 2> TargetRegs;
1628 for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1629 MachineOperand &MO = PHI.getOperand(Idx);
1630 assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1631 TargetRegs.insert(MO.getReg());
1632 }
1633
1634 bool Changed = false;
1635 RecurrenceCycle RC;
1636 if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1637 // Commutes operands of instructions in RC if necessary so that the copy to
1638 // be generated from PHI can be coalesced.
1639 LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1640 for (auto &RI : RC) {
1641 LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1642 auto CP = RI.getCommutePair();
1643 if (CP) {
1644 Changed = true;
1645 TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1646 (*CP).second);
1647 LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1648 }
1649 }
1650 }
1651
1652 return Changed;
1653}
1654
1658 MFPropsModifier _(*this, MF);
1659 auto *DT =
1660 Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
1661 auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
1662 PeepholeOptimizer Impl(DT, MLI);
1663 bool Changed = Impl.run(MF);
1664 if (!Changed)
1665 return PreservedAnalyses::all();
1666
1668 PA.preserve<MachineDominatorTreeAnalysis>();
1669 PA.preserve<MachineLoopAnalysis>();
1670 PA.preserveSet<CFGAnalyses>();
1671 return PA;
1672}
1673
1674bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
1675 if (skipFunction(MF.getFunction()))
1676 return false;
1677 auto *DT = Aggressive
1678 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1679 : nullptr;
1680 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1681 PeepholeOptimizer Impl(DT, MLI);
1682 return Impl.run(MF);
1683}
1684
1685bool PeepholeOptimizer::run(MachineFunction &MF) {
1686
1687 LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1688 LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1689
1690 if (DisablePeephole)
1691 return false;
1692
1693 TII = MF.getSubtarget().getInstrInfo();
1695 MRI = &MF.getRegInfo();
1696 MF.setDelegate(this);
1697
1698 bool Changed = false;
1699
1700 for (MachineBasicBlock &MBB : MF) {
1701 bool SeenMoveImm = false;
1702
1703 // During this forward scan, at some point it needs to answer the question
1704 // "given a pointer to an MI in the current BB, is it located before or
1705 // after the current instruction".
1706 // To perform this, the following set keeps track of the MIs already seen
1707 // during the scan, if a MI is not in the set, it is assumed to be located
1708 // after. Newly created MIs have to be inserted in the set as well.
1710 SmallSet<Register, 4> ImmDefRegs;
1712 SmallSet<Register, 16> FoldAsLoadDefCandidates;
1713
1714 // Track when a non-allocatable physical register is copied to a virtual
1715 // register so that useless moves can be removed.
1716 //
1717 // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1718 // without any intervening re-definition of $physreg.
1719 DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1720
1721 CopySrcMIs.clear();
1722
1723 bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1724
1725 for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1726 MII != MIE;) {
1727 MachineInstr *MI = &*MII;
1728 // We may be erasing MI below, increment MII now.
1729 ++MII;
1730 LocalMIs.insert(MI);
1731
1732 // Skip debug instructions. They should not affect this peephole
1733 // optimization.
1734 if (MI->isDebugInstr())
1735 continue;
1736
1737 if (MI->isPosition())
1738 continue;
1739
1740 if (IsLoopHeader && MI->isPHI()) {
1741 if (optimizeRecurrence(*MI)) {
1742 Changed = true;
1743 continue;
1744 }
1745 }
1746
1747 if (!MI->isCopy()) {
1748 for (const MachineOperand &MO : MI->operands()) {
1749 // Visit all operands: definitions can be implicit or explicit.
1750 if (MO.isReg()) {
1751 Register Reg = MO.getReg();
1752 if (MO.isDef() && isNAPhysCopy(Reg)) {
1753 const auto &Def = NAPhysToVirtMIs.find(Reg);
1754 if (Def != NAPhysToVirtMIs.end()) {
1755 // A new definition of the non-allocatable physical register
1756 // invalidates previous copies.
1758 << "NAPhysCopy: invalidating because of " << *MI);
1759 NAPhysToVirtMIs.erase(Def);
1760 }
1761 }
1762 } else if (MO.isRegMask()) {
1763 const uint32_t *RegMask = MO.getRegMask();
1764 for (auto &RegMI : NAPhysToVirtMIs) {
1765 Register Def = RegMI.first;
1766 if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
1768 << "NAPhysCopy: invalidating because of " << *MI);
1769 NAPhysToVirtMIs.erase(Def);
1770 }
1771 }
1772 }
1773 }
1774 }
1775
1776 if (MI->isImplicitDef() || MI->isKill())
1777 continue;
1778
1779 if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1780 // Blow away all non-allocatable physical registers knowledge since we
1781 // don't know what's correct anymore.
1782 //
1783 // FIXME: handle explicit asm clobbers.
1784 LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1785 << *MI);
1786 NAPhysToVirtMIs.clear();
1787 }
1788
1789 if ((isUncoalescableCopy(*MI) &&
1790 optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1791 (MI->isCompare() && optimizeCmpInstr(*MI)) ||
1792 (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1793 // MI is deleted.
1794 LocalMIs.erase(MI);
1795 Changed = true;
1796 continue;
1797 }
1798
1799 if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1800 Changed = true;
1801 continue;
1802 }
1803
1804 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1805 // MI is just rewritten.
1806 Changed = true;
1807 continue;
1808 }
1809
1810 if (MI->isCopy() && (foldRedundantCopy(*MI) ||
1811 foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1812 LocalMIs.erase(MI);
1813 LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1814 MI->eraseFromParent();
1815 Changed = true;
1816 continue;
1817 }
1818
1819 if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1820 SeenMoveImm = true;
1821 } else {
1822 Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1823 // optimizeExtInstr might have created new instructions after MI
1824 // and before the already incremented MII. Adjust MII so that the
1825 // next iteration sees the new instructions.
1826 MII = MI;
1827 ++MII;
1828 if (SeenMoveImm) {
1829 bool Deleted;
1830 Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
1831 if (Deleted) {
1832 LocalMIs.erase(MI);
1833 continue;
1834 }
1835 }
1836 }
1837
1838 // Check whether MI is a load candidate for folding into a later
1839 // instruction. If MI is not a candidate, check whether we can fold an
1840 // earlier load into MI.
1841 if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1842 !FoldAsLoadDefCandidates.empty()) {
1843
1844 // We visit each operand even after successfully folding a previous
1845 // one. This allows us to fold multiple loads into a single
1846 // instruction. We do assume that optimizeLoadInstr doesn't insert
1847 // foldable uses earlier in the argument list. Since we don't restart
1848 // iteration, we'd miss such cases.
1849 const MCInstrDesc &MIDesc = MI->getDesc();
1850 for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) {
1851 const MachineOperand &MOp = MI->getOperand(i);
1852 if (!MOp.isReg())
1853 continue;
1854 Register FoldAsLoadDefReg = MOp.getReg();
1855 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1856 // We need to fold load after optimizeCmpInstr, since
1857 // optimizeCmpInstr can enable folding by converting SUB to CMP.
1858 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
1859 // we need it for markUsesInDebugValueAsUndef().
1860 Register FoldedReg = FoldAsLoadDefReg;
1861 MachineInstr *DefMI = nullptr;
1862 if (MachineInstr *FoldMI =
1863 TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
1864 // Update LocalMIs since we replaced MI with FoldMI and deleted
1865 // DefMI.
1866 LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
1867 LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
1868 LocalMIs.erase(MI);
1869 LocalMIs.erase(DefMI);
1870 LocalMIs.insert(FoldMI);
1871 // Update the call site info.
1872 if (MI->shouldUpdateCallSiteInfo())
1873 MI->getMF()->moveCallSiteInfo(MI, FoldMI);
1874 MI->eraseFromParent();
1876 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1877 FoldAsLoadDefCandidates.erase(FoldedReg);
1878 ++NumLoadFold;
1879
1880 // MI is replaced with FoldMI so we can continue trying to fold
1881 Changed = true;
1882 MI = FoldMI;
1883 }
1884 }
1885 }
1886 }
1887
1888 // If we run into an instruction we can't fold across, discard
1889 // the load candidates. Note: We might be able to fold *into* this
1890 // instruction, so this needs to be after the folding logic.
1891 if (MI->isLoadFoldBarrier()) {
1892 LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1893 FoldAsLoadDefCandidates.clear();
1894 }
1895 }
1896 }
1897
1898 MF.resetDelegate(this);
1899 return Changed;
1900}
1901
1902ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1903 assert(Def->isCopy() && "Invalid definition");
1904 // Copy instruction are supposed to be: Def = Src.
1905 // If someone breaks this assumption, bad things will happen everywhere.
1906 // There may be implicit uses preventing the copy to be moved across
1907 // some target specific register definitions
1908 assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1909 "Invalid number of operands");
1910 assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1911
1912 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
1913 // If we look for a different subreg, it means we want a subreg of src.
1914 // Bails as we do not support composing subregs yet.
1915 return ValueTrackerResult();
1916 // Otherwise, we want the whole source.
1917 const MachineOperand &Src = Def->getOperand(1);
1918 if (Src.isUndef())
1919 return ValueTrackerResult();
1920 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1921}
1922
1923ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1924 assert(Def->isBitcast() && "Invalid definition");
1925
1926 // Bail if there are effects that a plain copy will not expose.
1927 if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1928 return ValueTrackerResult();
1929
1930 // Bitcasts with more than one def are not supported.
1931 if (Def->getDesc().getNumDefs() != 1)
1932 return ValueTrackerResult();
1933 const MachineOperand DefOp = Def->getOperand(DefIdx);
1934 if (DefOp.getSubReg() != DefSubReg)
1935 // If we look for a different subreg, it means we want a subreg of the src.
1936 // Bails as we do not support composing subregs yet.
1937 return ValueTrackerResult();
1938
1939 unsigned SrcIdx = Def->getNumOperands();
1940 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1941 ++OpIdx) {
1942 const MachineOperand &MO = Def->getOperand(OpIdx);
1943 if (!MO.isReg() || !MO.getReg())
1944 continue;
1945 // Ignore dead implicit defs.
1946 if (MO.isImplicit() && MO.isDead())
1947 continue;
1948 assert(!MO.isDef() && "We should have skipped all the definitions by now");
1949 if (SrcIdx != EndOpIdx)
1950 // Multiple sources?
1951 return ValueTrackerResult();
1952 SrcIdx = OpIdx;
1953 }
1954
1955 // In some rare case, Def has no input, SrcIdx is out of bound,
1956 // getOperand(SrcIdx) will fail below.
1957 if (SrcIdx >= Def->getNumOperands())
1958 return ValueTrackerResult();
1959
1960 // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
1961 // will break the assumed guarantees for the upper bits.
1962 for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
1963 if (UseMI.isSubregToReg())
1964 return ValueTrackerResult();
1965 }
1966
1967 const MachineOperand &Src = Def->getOperand(SrcIdx);
1968 if (Src.isUndef())
1969 return ValueTrackerResult();
1970 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1971}
1972
1973ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1974 assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
1975 "Invalid definition");
1976
1977 if (Def->getOperand(DefIdx).getSubReg())
1978 // If we are composing subregs, bail out.
1979 // The case we are checking is Def.<subreg> = REG_SEQUENCE.
1980 // This should almost never happen as the SSA property is tracked at
1981 // the register level (as opposed to the subreg level).
1982 // I.e.,
1983 // Def.sub0 =
1984 // Def.sub1 =
1985 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
1986 // Def. Thus, it must not be generated.
1987 // However, some code could theoretically generates a single
1988 // Def.sub0 (i.e, not defining the other subregs) and we would
1989 // have this case.
1990 // If we can ascertain (or force) that this never happens, we could
1991 // turn that into an assertion.
1992 return ValueTrackerResult();
1993
1994 if (!TII)
1995 // We could handle the REG_SEQUENCE here, but we do not want to
1996 // duplicate the code from the generic TII.
1997 return ValueTrackerResult();
1998
2000 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2001 return ValueTrackerResult();
2002
2003 // We are looking at:
2004 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
2005 // Check if one of the operand defines the subreg we are interested in.
2006 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
2007 if (RegSeqInput.SubIdx == DefSubReg)
2008 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2009 }
2010
2011 // If the subreg we are tracking is super-defined by another subreg,
2012 // we could follow this value. However, this would require to compose
2013 // the subreg and we do not do that for now.
2014 return ValueTrackerResult();
2015}
2016
2017ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2018 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
2019 "Invalid definition");
2020
2021 if (Def->getOperand(DefIdx).getSubReg())
2022 // If we are composing subreg, bail out.
2023 // Same remark as getNextSourceFromRegSequence.
2024 // I.e., this may be turned into an assert.
2025 return ValueTrackerResult();
2026
2027 if (!TII)
2028 // We could handle the REG_SEQUENCE here, but we do not want to
2029 // duplicate the code from the generic TII.
2030 return ValueTrackerResult();
2031
2032 RegSubRegPair BaseReg;
2033 RegSubRegPairAndIdx InsertedReg;
2034 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2035 return ValueTrackerResult();
2036
2037 // We are looking at:
2038 // Def = INSERT_SUBREG v0, v1, sub1
2039 // There are two cases:
2040 // 1. DefSubReg == sub1, get v1.
2041 // 2. DefSubReg != sub1, the value may be available through v0.
2042
2043 // #1 Check if the inserted register matches the required sub index.
2044 if (InsertedReg.SubIdx == DefSubReg) {
2045 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
2046 }
2047 // #2 Otherwise, if the sub register we are looking for is not partial
2048 // defined by the inserted element, we can look through the main
2049 // register (v0).
2050 const MachineOperand &MODef = Def->getOperand(DefIdx);
2051 // If the result register (Def) and the base register (v0) do not
2052 // have the same register class or if we have to compose
2053 // subregisters, bail out.
2054 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
2055 BaseReg.SubReg)
2056 return ValueTrackerResult();
2057
2058 // Get the TRI and check if the inserted sub-register overlaps with the
2059 // sub-register we are tracking.
2060 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2061 if (!TRI || !(TRI->getSubRegIndexLaneMask(DefSubReg) &
2062 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx))
2063 .none())
2064 return ValueTrackerResult();
2065 // At this point, the value is available in v0 via the same subreg
2066 // we used for Def.
2067 return ValueTrackerResult(BaseReg.Reg, DefSubReg);
2068}
2069
2070ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2071 assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) &&
2072 "Invalid definition");
2073 // We are looking at:
2074 // Def = EXTRACT_SUBREG v0, sub0
2075
2076 // Bail if we have to compose sub registers.
2077 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
2078 if (DefSubReg)
2079 return ValueTrackerResult();
2080
2081 if (!TII)
2082 // We could handle the EXTRACT_SUBREG here, but we do not want to
2083 // duplicate the code from the generic TII.
2084 return ValueTrackerResult();
2085
2086 RegSubRegPairAndIdx ExtractSubregInputReg;
2087 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2088 return ValueTrackerResult();
2089
2090 // Bail if we have to compose sub registers.
2091 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2092 if (ExtractSubregInputReg.SubReg)
2093 return ValueTrackerResult();
2094 // Otherwise, the value is available in the v0.sub0.
2095 return ValueTrackerResult(ExtractSubregInputReg.Reg,
2096 ExtractSubregInputReg.SubIdx);
2097}
2098
2099ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2100 assert(Def->isSubregToReg() && "Invalid definition");
2101 // We are looking at:
2102 // Def = SUBREG_TO_REG Imm, v0, sub0
2103
2104 // Bail if we have to compose sub registers.
2105 // If DefSubReg != sub0, we would have to check that all the bits
2106 // we track are included in sub0 and if yes, we would have to
2107 // determine the right subreg in v0.
2108 if (DefSubReg != Def->getOperand(3).getImm())
2109 return ValueTrackerResult();
2110 // Bail if we have to compose sub registers.
2111 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2112 if (Def->getOperand(2).getSubReg())
2113 return ValueTrackerResult();
2114
2115 return ValueTrackerResult(Def->getOperand(2).getReg(),
2116 Def->getOperand(3).getImm());
2117}
2118
2119/// Explore each PHI incoming operand and return its sources.
2120ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2121 assert(Def->isPHI() && "Invalid definition");
2122 ValueTrackerResult Res;
2123
2124 // If we look for a different subreg, bail as we do not support composing
2125 // subregs yet.
2126 if (Def->getOperand(0).getSubReg() != DefSubReg)
2127 return ValueTrackerResult();
2128
2129 // Return all register sources for PHI instructions.
2130 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2131 const MachineOperand &MO = Def->getOperand(i);
2132 assert(MO.isReg() && "Invalid PHI instruction");
2133 // We have no code to deal with undef operands. They shouldn't happen in
2134 // normal programs anyway.
2135 if (MO.isUndef())
2136 return ValueTrackerResult();
2137 Res.addSource(MO.getReg(), MO.getSubReg());
2138 }
2139
2140 return Res;
2141}
2142
2143ValueTrackerResult ValueTracker::getNextSourceImpl() {
2144 assert(Def && "This method needs a valid definition");
2145
2146 assert(((Def->getOperand(DefIdx).isDef() &&
2147 (DefIdx < Def->getDesc().getNumDefs() ||
2148 Def->getDesc().isVariadic())) ||
2149 Def->getOperand(DefIdx).isImplicit()) &&
2150 "Invalid DefIdx");
2151 if (Def->isCopy())
2152 return getNextSourceFromCopy();
2153 if (Def->isBitcast())
2154 return getNextSourceFromBitcast();
2155 // All the remaining cases involve "complex" instructions.
2156 // Bail if we did not ask for the advanced tracking.
2158 return ValueTrackerResult();
2159 if (Def->isRegSequence() || Def->isRegSequenceLike())
2160 return getNextSourceFromRegSequence();
2161 if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2162 return getNextSourceFromInsertSubreg();
2163 if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2164 return getNextSourceFromExtractSubreg();
2165 if (Def->isSubregToReg())
2166 return getNextSourceFromSubregToReg();
2167 if (Def->isPHI())
2168 return getNextSourceFromPHI();
2169 return ValueTrackerResult();
2170}
2171
2172ValueTrackerResult ValueTracker::getNextSource() {
2173 // If we reach a point where we cannot move up in the use-def chain,
2174 // there is nothing we can get.
2175 if (!Def)
2176 return ValueTrackerResult();
2177
2178 ValueTrackerResult Res = getNextSourceImpl();
2179 if (Res.isValid()) {
2180 // Update definition, definition index, and subregister for the
2181 // next call of getNextSource.
2182 // Update the current register.
2183 bool OneRegSrc = Res.getNumSources() == 1;
2184 if (OneRegSrc)
2185 Reg = Res.getSrcReg(0);
2186 // Update the result before moving up in the use-def chain
2187 // with the instruction containing the last found sources.
2188 Res.setInst(Def);
2189
2190 // If we can still move up in the use-def chain, move to the next
2191 // definition.
2192 if (!Reg.isPhysical() && OneRegSrc) {
2193 MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
2194 if (DI != MRI.def_end()) {
2195 Def = DI->getParent();
2196 DefIdx = DI.getOperandNo();
2197 DefSubReg = Res.getSrcSubReg(0);
2198 } else {
2199 Def = nullptr;
2200 }
2201 return Res;
2202 }
2203 }
2204 // If we end up here, this means we will not be able to find another source
2205 // for the next iteration. Make sure any new call to getNextSource bails out
2206 // early by cutting the use-def chain.
2207 Def = nullptr;
2208 return Res;
2209}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
TargetInstrInfo::RegSubRegPair RegSubRegPair
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
static Rewriter * getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
Get the appropriated Rewriter for MI.
Peephole Optimizations
#define DEBUG_TYPE
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Virtual Register Rewriter
Definition: VirtRegMap.cpp:261
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
An RAII based helper class to modify MachineFunctionProperties when running pass.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
virtual void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID)
Callback before changing MCInstrDesc.
virtual void MF_HandleRemoval(MachineInstr &MI)=0
Callback before a removal. This should not modify the MI directly.
virtual void MF_HandleInsertion(MachineInstr &MI)=0
Callback after an insertion. This should not modify the MI directly.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::pair< unsigned, unsigned > IndexPair
The pair of an instruction index and a operand index.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePeepholeOptimizerLegacyPass(PassRegistry &)
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.