LLVM 20.0.0git
PeepholeOptimizer.cpp
Go to the documentation of this file.
1//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Perform peephole optimizations on the machine code:
10//
11// - Optimize Extensions
12//
13// Optimization of sign / zero extension instructions. It may be extended to
14// handle other instructions with similar properties.
15//
16// On some targets, some instructions, e.g. X86 sign / zero extension, may
17// leave the source value in the lower part of the result. This optimization
18// will replace some uses of the pre-extension value with uses of the
19// sub-register of the results.
20//
21// - Optimize Comparisons
22//
23// Optimization of comparison instructions. For instance, in this code:
24//
25// sub r1, 1
26// cmp r1, 0
27// bz L1
28//
29// If the "sub" instruction all ready sets (or could be modified to set) the
30// same flag that the "cmp" instruction sets and that "bz" uses, then we can
31// eliminate the "cmp" instruction.
32//
33// Another instance, in this code:
34//
35// sub r1, r3 | sub r1, imm
36// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37// bge L1
38//
39// If the branch instruction can use flag from "sub", then we can replace
40// "sub" with "subs" and eliminate the "cmp" instruction.
41//
42// - Optimize Loads:
43//
44// Loads that can be folded into a later instruction. A load is foldable
45// if it loads to virtual registers and the virtual register defined has
46// a single use.
47//
48// - Optimize Copies and Bitcast (more generally, target specific copies):
49//
50// Rewrite copies and bitcasts to avoid cross register bank copies
51// when possible.
52// E.g., Consider the following example, where capital and lower
53// letters denote different register file:
54// b = copy A <-- cross-bank copy
55// C = copy b <-- cross-bank copy
56// =>
57// b = copy A <-- cross-bank copy
58// C = copy A <-- same-bank copy
59//
60// E.g., for bitcast:
61// b = bitcast A <-- cross-bank copy
62// C = bitcast b <-- cross-bank copy
63// =>
64// b = bitcast A <-- cross-bank copy
65// C = copy A <-- same-bank copy
66//===----------------------------------------------------------------------===//
67
69#include "llvm/ADT/DenseMap.h"
71#include "llvm/ADT/SmallSet.h"
73#include "llvm/ADT/Statistic.h"
89#include "llvm/MC/LaneBitmask.h"
90#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/Pass.h"
93#include "llvm/Support/Debug.h"
95#include <cassert>
96#include <cstdint>
97#include <memory>
98#include <utility>
99
100using namespace llvm;
103
104#define DEBUG_TYPE "peephole-opt"
105
106// Optimize Extensions
107static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
108 cl::desc("Aggressive extension optimization"));
109
110static cl::opt<bool>
111 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
112 cl::desc("Disable the peephole optimizer"));
113
114/// Specifiy whether or not the value tracking looks through
115/// complex instructions. When this is true, the value tracker
116/// bails on everything that is not a copy or a bitcast.
117static cl::opt<bool>
118 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
119 cl::desc("Disable advanced copy optimization"));
120
122 "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
123 cl::desc("Disable non-allocatable physical register copy optimization"));
124
125// Limit the number of PHI instructions to process
126// in PeepholeOptimizer::getNextSource.
128 RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10),
129 cl::desc("Limit the length of PHI chains to lookup"));
130
131// Limit the length of recurrence chain when evaluating the benefit of
132// commuting operands.
134 "recurrence-chain-limit", cl::Hidden, cl::init(3),
135 cl::desc("Maximum length of recurrence chain when evaluating the benefit "
136 "of commuting operands"));
137
138STATISTIC(NumReuse, "Number of extension results reused");
139STATISTIC(NumCmps, "Number of compares eliminated");
140STATISTIC(NumImmFold, "Number of move immediate folded");
141STATISTIC(NumLoadFold, "Number of loads folded");
142STATISTIC(NumSelects, "Number of selects optimized");
143STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
144STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
145STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
146
147namespace {
148
149class ValueTrackerResult;
150class RecurrenceInstr;
151
152/// Interface to query instructions amenable to copy rewriting.
153class Rewriter {
154protected:
155 MachineInstr &CopyLike;
156 unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
157public:
158 Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
159 virtual ~Rewriter() = default;
160
161 /// Get the next rewritable source (SrcReg, SrcSubReg) and
162 /// the related value that it affects (DstReg, DstSubReg).
163 /// A source is considered rewritable if its register class and the
164 /// register class of the related DstReg may not be register
165 /// coalescer friendly. In other words, given a copy-like instruction
166 /// not all the arguments may be returned at rewritable source, since
167 /// some arguments are none to be register coalescer friendly.
168 ///
169 /// Each call of this method moves the current source to the next
170 /// rewritable source.
171 /// For instance, let CopyLike be the instruction to rewrite.
172 /// CopyLike has one definition and one source:
173 /// dst.dstSubIdx = CopyLike src.srcSubIdx.
174 ///
175 /// The first call will give the first rewritable source, i.e.,
176 /// the only source this instruction has:
177 /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
178 /// This source defines the whole definition, i.e.,
179 /// (DstReg, DstSubReg) = (dst, dstSubIdx).
180 ///
181 /// The second and subsequent calls will return false, as there is only one
182 /// rewritable source.
183 ///
184 /// \return True if a rewritable source has been found, false otherwise.
185 /// The output arguments are valid if and only if true is returned.
186 virtual bool getNextRewritableSource(RegSubRegPair &Src,
187 RegSubRegPair &Dst) = 0;
188
189 /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
190 /// \return True if the rewriting was possible, false otherwise.
191 virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
192};
193
194/// Rewriter for COPY instructions.
195class CopyRewriter : public Rewriter {
196public:
197 CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
198 assert(MI.isCopy() && "Expected copy instruction");
199 }
200 virtual ~CopyRewriter() = default;
201
202 bool getNextRewritableSource(RegSubRegPair &Src,
203 RegSubRegPair &Dst) override {
204 // CurrentSrcIdx > 0 means this function has already been called.
205 if (CurrentSrcIdx > 0)
206 return false;
207 // This is the first call to getNextRewritableSource.
208 // Move the CurrentSrcIdx to remember that we made that call.
209 CurrentSrcIdx = 1;
210 // The rewritable source is the argument.
211 const MachineOperand &MOSrc = CopyLike.getOperand(1);
212 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
213 // What we track are the alternative sources of the definition.
214 const MachineOperand &MODef = CopyLike.getOperand(0);
215 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
216 return true;
217 }
218
219 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
220 if (CurrentSrcIdx != 1)
221 return false;
222 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
223 MOSrc.setReg(NewReg);
224 MOSrc.setSubReg(NewSubReg);
225 return true;
226 }
227};
228
229/// Helper class to rewrite uncoalescable copy like instructions
230/// into new COPY (coalescable friendly) instructions.
231class UncoalescableRewriter : public Rewriter {
232 unsigned NumDefs; ///< Number of defs in the bitcast.
233
234public:
235 UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
236 NumDefs = MI.getDesc().getNumDefs();
237 }
238
239 /// \see See Rewriter::getNextRewritableSource()
240 /// All such sources need to be considered rewritable in order to
241 /// rewrite a uncoalescable copy-like instruction. This method return
242 /// each definition that must be checked if rewritable.
243 bool getNextRewritableSource(RegSubRegPair &Src,
244 RegSubRegPair &Dst) override {
245 // Find the next non-dead definition and continue from there.
246 if (CurrentSrcIdx == NumDefs)
247 return false;
248
249 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
250 ++CurrentSrcIdx;
251 if (CurrentSrcIdx == NumDefs)
252 return false;
253 }
254
255 // What we track are the alternative sources of the definition.
256 Src = RegSubRegPair(0, 0);
257 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
259
260 CurrentSrcIdx++;
261 return true;
262 }
263
264 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
265 return false;
266 }
267};
268
269/// Specialized rewriter for INSERT_SUBREG instruction.
270class InsertSubregRewriter : public Rewriter {
271public:
272 InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
273 assert(MI.isInsertSubreg() && "Invalid instruction");
274 }
275
276 /// \see See Rewriter::getNextRewritableSource()
277 /// Here CopyLike has the following form:
278 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
279 /// Src1 has the same register class has dst, hence, there is
280 /// nothing to rewrite.
281 /// Src2.src2SubIdx, may not be register coalescer friendly.
282 /// Therefore, the first call to this method returns:
283 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
284 /// (DstReg, DstSubReg) = (dst, subIdx).
285 ///
286 /// Subsequence calls will return false.
287 bool getNextRewritableSource(RegSubRegPair &Src,
288 RegSubRegPair &Dst) override {
289 // If we already get the only source we can rewrite, return false.
290 if (CurrentSrcIdx == 2)
291 return false;
292 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
293 CurrentSrcIdx = 2;
294 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
295 Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
296 const MachineOperand &MODef = CopyLike.getOperand(0);
297
298 // We want to track something that is compatible with the
299 // partial definition.
300 if (MODef.getSubReg())
301 // Bail if we have to compose sub-register indices.
302 return false;
303 Dst = RegSubRegPair(MODef.getReg(),
304 (unsigned)CopyLike.getOperand(3).getImm());
305 return true;
306 }
307
308 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
309 if (CurrentSrcIdx != 2)
310 return false;
311 // We are rewriting the inserted reg.
312 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
313 MO.setReg(NewReg);
314 MO.setSubReg(NewSubReg);
315 return true;
316 }
317};
318
319/// Specialized rewriter for EXTRACT_SUBREG instruction.
320class ExtractSubregRewriter : public Rewriter {
321 const TargetInstrInfo &TII;
322
323public:
324 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
325 : Rewriter(MI), TII(TII) {
326 assert(MI.isExtractSubreg() && "Invalid instruction");
327 }
328
329 /// \see Rewriter::getNextRewritableSource()
330 /// Here CopyLike has the following form:
331 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
332 /// There is only one rewritable source: Src.subIdx,
333 /// which defines dst.dstSubIdx.
334 bool getNextRewritableSource(RegSubRegPair &Src,
335 RegSubRegPair &Dst) override {
336 // If we already get the only source we can rewrite, return false.
337 if (CurrentSrcIdx == 1)
338 return false;
339 // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
340 CurrentSrcIdx = 1;
341 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
342 // If we have to compose sub-register indices, bail out.
343 if (MOExtractedReg.getSubReg())
344 return false;
345
346 Src =
347 RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
348
349 // We want to track something that is compatible with the definition.
350 const MachineOperand &MODef = CopyLike.getOperand(0);
351 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
352 return true;
353 }
354
355 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
356 // The only source we can rewrite is the input register.
357 if (CurrentSrcIdx != 1)
358 return false;
359
360 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
361
362 // If we find a source that does not require to extract something,
363 // rewrite the operation with a copy.
364 if (!NewSubReg) {
365 // Move the current index to an invalid position.
366 // We do not want another call to this method to be able
367 // to do any change.
368 CurrentSrcIdx = -1;
369 // Rewrite the operation as a COPY.
370 // Get rid of the sub-register index.
371 CopyLike.removeOperand(2);
372 // Morph the operation into a COPY.
373 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
374 return true;
375 }
376 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
377 return true;
378 }
379};
380
381/// Specialized rewriter for REG_SEQUENCE instruction.
382class RegSequenceRewriter : public Rewriter {
383public:
384 RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
385 assert(MI.isRegSequence() && "Invalid instruction");
386 }
387
388 /// \see Rewriter::getNextRewritableSource()
389 /// Here CopyLike has the following form:
390 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
391 /// Each call will return a different source, walking all the available
392 /// source.
393 ///
394 /// The first call returns:
395 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
396 /// (DstReg, DstSubReg) = (dst, subIdx1).
397 ///
398 /// The second call returns:
399 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
400 /// (DstReg, DstSubReg) = (dst, subIdx2).
401 ///
402 /// And so on, until all the sources have been traversed, then
403 /// it returns false.
404 bool getNextRewritableSource(RegSubRegPair &Src,
405 RegSubRegPair &Dst) override {
406 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
407
408 // If this is the first call, move to the first argument.
409 if (CurrentSrcIdx == 0) {
410 CurrentSrcIdx = 1;
411 } else {
412 // Otherwise, move to the next argument and check that it is valid.
413 CurrentSrcIdx += 2;
414 if (CurrentSrcIdx >= CopyLike.getNumOperands())
415 return false;
416 }
417 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
418 Src.Reg = MOInsertedReg.getReg();
419 // If we have to compose sub-register indices, bail out.
420 if ((Src.SubReg = MOInsertedReg.getSubReg()))
421 return false;
422
423 // We want to track something that is compatible with the related
424 // partial definition.
425 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
426
427 const MachineOperand &MODef = CopyLike.getOperand(0);
428 Dst.Reg = MODef.getReg();
429 assert(MODef.getSubReg() == 0 && "cannot have subregister def in SSA");
430 return true;
431 }
432
433 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
434 // We cannot rewrite out of bound operands.
435 // Moreover, rewritable sources are at odd positions.
436 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
437 return false;
438
439 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
440 MO.setReg(NewReg);
441 MO.setSubReg(NewSubReg);
442 return true;
443 }
444};
445
446class PeepholeOptimizer : private MachineFunction::Delegate {
447 const TargetInstrInfo *TII = nullptr;
448 const TargetRegisterInfo *TRI = nullptr;
449 MachineRegisterInfo *MRI = nullptr;
450 MachineDominatorTree *DT = nullptr; // Machine dominator tree
451 MachineLoopInfo *MLI = nullptr;
452
453public:
454 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
455 : DT(DT), MLI(MLI) {}
456
457 bool run(MachineFunction &MF);
458 /// Track Def -> Use info used for rewriting copies.
460
461 /// Sequence of instructions that formulate recurrence cycle.
462 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
463
464private:
465 bool optimizeCmpInstr(MachineInstr &MI);
466 bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
468 bool optimizeSelect(MachineInstr &MI,
470 bool optimizeCondBranch(MachineInstr &MI);
471
472 bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter);
473 bool optimizeCoalescableCopy(MachineInstr &MI);
474 bool optimizeUncoalescableCopy(MachineInstr &MI,
476 bool optimizeRecurrence(MachineInstr &PHI);
477 bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
478 bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
480 bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
482 bool &Deleted);
483
484 /// Finds recurrence cycles, but only ones that formulated around
485 /// a def operand and a use operand that are tied. If there is a use
486 /// operand commutable with the tied use operand, find recurrence cycle
487 /// along that operand as well.
488 bool findTargetRecurrence(Register Reg,
489 const SmallSet<Register, 2> &TargetReg,
490 RecurrenceCycle &RC);
491
492 /// If copy instruction \p MI is a virtual register copy or a copy of a
493 /// constant physical register to a virtual register, track it in the
494 /// set CopySrcMIs. If this virtual register was previously seen as a
495 /// copy, replace the uses of this copy with the previously seen copy's
496 /// destination register.
497 bool foldRedundantCopy(MachineInstr &MI);
498
499 /// Is the register \p Reg a non-allocatable physical register?
500 bool isNAPhysCopy(Register Reg);
501
502 /// If copy instruction \p MI is a non-allocatable virtual<->physical
503 /// register copy, track it in the \p NAPhysToVirtMIs map. If this
504 /// non-allocatable physical register was previously copied to a virtual
505 /// registered and hasn't been clobbered, the virt->phys copy can be
506 /// deleted.
507 bool
508 foldRedundantNAPhysCopy(MachineInstr &MI,
509 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
510
511 bool isLoadFoldable(MachineInstr &MI,
512 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
513
514 /// Check whether \p MI is understood by the register coalescer
515 /// but may require some rewriting.
516 bool isCoalescableCopy(const MachineInstr &MI) {
517 // SubregToRegs are not interesting, because they are already register
518 // coalescer friendly.
519 return MI.isCopy() ||
520 (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
521 MI.isExtractSubreg()));
522 }
523
524 /// Check whether \p MI is a copy like instruction that is
525 /// not recognized by the register coalescer.
526 bool isUncoalescableCopy(const MachineInstr &MI) {
527 return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
528 MI.isInsertSubregLike() ||
529 MI.isExtractSubregLike()));
530 }
531
532 MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
533 RewriteMapTy &RewriteMap);
534
535 // Set of copies to virtual registers keyed by source register. Never
536 // holds any physreg which requires def tracking.
538
539 // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
540 void MF_HandleInsertion(MachineInstr &MI) override { return; }
541
542 bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
543 if (!MI.isCopy())
544 return false;
545
546 Register SrcReg = MI.getOperand(1).getReg();
547 unsigned SrcSubReg = MI.getOperand(1).getSubReg();
548 if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
549 return false;
550
551 SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
552 return true;
553 }
554
555 // If a COPY instruction is to be deleted or changed, we should also remove
556 // it from CopySrcMIs.
557 void deleteChangedCopy(MachineInstr &MI) {
558 RegSubRegPair SrcPair;
559 if (!getCopySrc(MI, SrcPair))
560 return;
561
562 auto It = CopySrcMIs.find(SrcPair);
563 if (It != CopySrcMIs.end() && It->second == &MI)
564 CopySrcMIs.erase(It);
565 }
566
567 void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
568
569 void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
570 deleteChangedCopy(MI);
571 }
572};
573
574class PeepholeOptimizerLegacy : public MachineFunctionPass {
575public:
576 static char ID; // Pass identification
577
578 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
580 }
581
582 bool runOnMachineFunction(MachineFunction &MF) override;
583
584 void getAnalysisUsage(AnalysisUsage &AU) const override {
585 AU.setPreservesCFG();
589 if (Aggressive) {
592 }
593 }
594
597 MachineFunctionProperties::Property::IsSSA);
598 }
599};
600
601/// Helper class to hold instructions that are inside recurrence cycles.
602/// The recurrence cycle is formulated around 1) a def operand and its
603/// tied use operand, or 2) a def operand and a use operand that is commutable
604/// with another use operand which is tied to the def operand. In the latter
605/// case, index of the tied use operand and the commutable use operand are
606/// maintained with CommutePair.
607class RecurrenceInstr {
608public:
609 using IndexPair = std::pair<unsigned, unsigned>;
610
611 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
612 RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
613 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
614
615 MachineInstr *getMI() const { return MI; }
616 std::optional<IndexPair> getCommutePair() const { return CommutePair; }
617
618private:
620 std::optional<IndexPair> CommutePair;
621};
622
623/// Helper class to hold a reply for ValueTracker queries.
624/// Contains the returned sources for a given search and the instructions
625/// where the sources were tracked from.
626class ValueTrackerResult {
627private:
628 /// Track all sources found by one ValueTracker query.
630
631 /// Instruction using the sources in 'RegSrcs'.
632 const MachineInstr *Inst = nullptr;
633
634public:
635 ValueTrackerResult() = default;
636
637 ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
638
639 bool isValid() const { return getNumSources() > 0; }
640
641 void setInst(const MachineInstr *I) { Inst = I; }
642 const MachineInstr *getInst() const { return Inst; }
643
644 void clear() {
645 RegSrcs.clear();
646 Inst = nullptr;
647 }
648
649 void addSource(Register SrcReg, unsigned SrcSubReg) {
650 RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
651 }
652
653 void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
654 assert(Idx < getNumSources() && "Reg pair source out of index");
655 RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
656 }
657
658 int getNumSources() const { return RegSrcs.size(); }
659
660 RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
661
662 Register getSrcReg(int Idx) const {
663 assert(Idx < getNumSources() && "Reg source out of index");
664 return RegSrcs[Idx].Reg;
665 }
666
667 unsigned getSrcSubReg(int Idx) const {
668 assert(Idx < getNumSources() && "SubReg source out of index");
669 return RegSrcs[Idx].SubReg;
670 }
671
672 bool operator==(const ValueTrackerResult &Other) const {
673 if (Other.getInst() != getInst())
674 return false;
675
676 if (Other.getNumSources() != getNumSources())
677 return false;
678
679 for (int i = 0, e = Other.getNumSources(); i != e; ++i)
680 if (Other.getSrcReg(i) != getSrcReg(i) ||
681 Other.getSrcSubReg(i) != getSrcSubReg(i))
682 return false;
683 return true;
684 }
685};
686
687/// Helper class to track the possible sources of a value defined by
688/// a (chain of) copy related instructions.
689/// Given a definition (instruction and definition index), this class
690/// follows the use-def chain to find successive suitable sources.
691/// The given source can be used to rewrite the definition into
692/// def = COPY src.
693///
694/// For instance, let us consider the following snippet:
695/// v0 =
696/// v2 = INSERT_SUBREG v1, v0, sub0
697/// def = COPY v2.sub0
698///
699/// Using a ValueTracker for def = COPY v2.sub0 will give the following
700/// suitable sources:
701/// v2.sub0 and v0.
702/// Then, def can be rewritten into def = COPY v0.
703class ValueTracker {
704private:
705 /// The current point into the use-def chain.
706 const MachineInstr *Def = nullptr;
707
708 /// The index of the definition in Def.
709 unsigned DefIdx = 0;
710
711 /// The sub register index of the definition.
712 unsigned DefSubReg;
713
714 /// The register where the value can be found.
716
717 /// MachineRegisterInfo used to perform tracking.
719
720 /// Optional TargetInstrInfo used to perform some complex tracking.
721 const TargetInstrInfo *TII;
722
723 /// Dispatcher to the right underlying implementation of getNextSource.
724 ValueTrackerResult getNextSourceImpl();
725
726 /// Specialized version of getNextSource for Copy instructions.
727 ValueTrackerResult getNextSourceFromCopy();
728
729 /// Specialized version of getNextSource for Bitcast instructions.
730 ValueTrackerResult getNextSourceFromBitcast();
731
732 /// Specialized version of getNextSource for RegSequence instructions.
733 ValueTrackerResult getNextSourceFromRegSequence();
734
735 /// Specialized version of getNextSource for InsertSubreg instructions.
736 ValueTrackerResult getNextSourceFromInsertSubreg();
737
738 /// Specialized version of getNextSource for ExtractSubreg instructions.
739 ValueTrackerResult getNextSourceFromExtractSubreg();
740
741 /// Specialized version of getNextSource for SubregToReg instructions.
742 ValueTrackerResult getNextSourceFromSubregToReg();
743
744 /// Specialized version of getNextSource for PHI instructions.
745 ValueTrackerResult getNextSourceFromPHI();
746
747public:
748 /// Create a ValueTracker instance for the value defined by \p Reg.
749 /// \p DefSubReg represents the sub register index the value tracker will
750 /// track. It does not need to match the sub register index used in the
751 /// definition of \p Reg.
752 /// If \p Reg is a physical register, a value tracker constructed with
753 /// this constructor will not find any alternative source.
754 /// Indeed, when \p Reg is a physical register that constructor does not
755 /// know which definition of \p Reg it should track.
756 /// Use the next constructor to track a physical register.
757 ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI,
758 const TargetInstrInfo *TII = nullptr)
759 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
760 if (!Reg.isPhysical()) {
761 Def = MRI.getVRegDef(Reg);
762 DefIdx = MRI.def_begin(Reg).getOperandNo();
763 }
764 }
765
766 /// Following the use-def chain, get the next available source
767 /// for the tracked value.
768 /// \return A ValueTrackerResult containing a set of registers
769 /// and sub registers with tracked values. A ValueTrackerResult with
770 /// an empty set of registers means no source was found.
771 ValueTrackerResult getNextSource();
772};
773
774} // end anonymous namespace
775
776char PeepholeOptimizerLegacy::ID = 0;
777
778char &llvm::PeepholeOptimizerLegacyID = PeepholeOptimizerLegacy::ID;
779
780INITIALIZE_PASS_BEGIN(PeepholeOptimizerLegacy, DEBUG_TYPE,
781 "Peephole Optimizations", false, false)
784INITIALIZE_PASS_END(PeepholeOptimizerLegacy, DEBUG_TYPE,
786
787/// If instruction is a copy-like instruction, i.e. it reads a single register
788/// and writes a single register and it does not modify the source, and if the
789/// source value is preserved as a sub-register of the result, then replace all
790/// reachable uses of the source with the subreg of the result.
791///
792/// Do not generate an EXTRACT that is used only in a debug use, as this changes
793/// the code. Since this code does not currently share EXTRACTs, just ignore all
794/// debug uses.
795bool PeepholeOptimizer::optimizeExtInstr(
797 SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
798 Register SrcReg, DstReg;
799 unsigned SubIdx;
800 if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
801 return false;
802
803 if (DstReg.isPhysical() || SrcReg.isPhysical())
804 return false;
805
806 if (MRI->hasOneNonDBGUse(SrcReg))
807 // No other uses.
808 return false;
809
810 // Ensure DstReg can get a register class that actually supports
811 // sub-registers. Don't change the class until we commit.
812 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
813 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
814 if (!DstRC)
815 return false;
816
817 // The ext instr may be operating on a sub-register of SrcReg as well.
818 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
819 // register.
820 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
821 // SrcReg:SubIdx should be replaced.
822 bool UseSrcSubIdx =
823 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
824
825 // The source has other uses. See if we can replace the other uses with use of
826 // the result of the extension.
828 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
829 ReachedBBs.insert(UI.getParent());
830
831 // Uses that are in the same BB of uses of the result of the instruction.
833
834 // Uses that the result of the instruction can reach.
836
837 bool ExtendLife = true;
838 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
839 MachineInstr *UseMI = UseMO.getParent();
840 if (UseMI == &MI)
841 continue;
842
843 if (UseMI->isPHI()) {
844 ExtendLife = false;
845 continue;
846 }
847
848 // Only accept uses of SrcReg:SubIdx.
849 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
850 continue;
851
852 // It's an error to translate this:
853 //
854 // %reg1025 = <sext> %reg1024
855 // ...
856 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
857 //
858 // into this:
859 //
860 // %reg1025 = <sext> %reg1024
861 // ...
862 // %reg1027 = COPY %reg1025:4
863 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
864 //
865 // The problem here is that SUBREG_TO_REG is there to assert that an
866 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
867 // the COPY here, it will give us the value after the <sext>, not the
868 // original value of %reg1024 before <sext>.
869 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
870 continue;
871
872 MachineBasicBlock *UseMBB = UseMI->getParent();
873 if (UseMBB == &MBB) {
874 // Local uses that come after the extension.
875 if (!LocalMIs.count(UseMI))
876 Uses.push_back(&UseMO);
877 } else if (ReachedBBs.count(UseMBB)) {
878 // Non-local uses where the result of the extension is used. Always
879 // replace these unless it's a PHI.
880 Uses.push_back(&UseMO);
881 } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
882 // We may want to extend the live range of the extension result in order
883 // to replace these uses.
884 ExtendedUses.push_back(&UseMO);
885 } else {
886 // Both will be live out of the def MBB anyway. Don't extend live range of
887 // the extension result.
888 ExtendLife = false;
889 break;
890 }
891 }
892
893 if (ExtendLife && !ExtendedUses.empty())
894 // Extend the liveness of the extension result.
895 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
896
897 // Now replace all uses.
898 bool Changed = false;
899 if (!Uses.empty()) {
901
902 // Look for PHI uses of the extended result, we don't want to extend the
903 // liveness of a PHI input. It breaks all kinds of assumptions down
904 // stream. A PHI use is expected to be the kill of its source values.
905 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
906 if (UI.isPHI())
907 PHIBBs.insert(UI.getParent());
908
909 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
910 for (MachineOperand *UseMO : Uses) {
911 MachineInstr *UseMI = UseMO->getParent();
912 MachineBasicBlock *UseMBB = UseMI->getParent();
913 if (PHIBBs.count(UseMBB))
914 continue;
915
916 // About to add uses of DstReg, clear DstReg's kill flags.
917 if (!Changed) {
918 MRI->clearKillFlags(DstReg);
919 MRI->constrainRegClass(DstReg, DstRC);
920 }
921
922 // SubReg defs are illegal in machine SSA phase,
923 // we should not generate SubReg defs.
924 //
925 // For example, for the instructions:
926 //
927 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
928 // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
929 //
930 // We should generate:
931 //
932 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
933 // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
934 // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
935 //
936 if (UseSrcSubIdx)
937 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
938
939 Register NewVR = MRI->createVirtualRegister(RC);
940 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
941 TII->get(TargetOpcode::COPY), NewVR)
942 .addReg(DstReg, 0, SubIdx);
943 if (UseSrcSubIdx)
944 UseMO->setSubReg(0);
945
946 UseMO->setReg(NewVR);
947 ++NumReuse;
948 Changed = true;
949 }
950 }
951
952 return Changed;
953}
954
955/// If the instruction is a compare and the previous instruction it's comparing
956/// against already sets (or could be modified to set) the same flag as the
957/// compare, then we can remove the comparison and use the flag from the
958/// previous instruction.
959bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
960 // If this instruction is a comparison against zero and isn't comparing a
961 // physical register, we can try to optimize it.
962 Register SrcReg, SrcReg2;
963 int64_t CmpMask, CmpValue;
964 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
965 SrcReg.isPhysical() || SrcReg2.isPhysical())
966 return false;
967
968 // Attempt to optimize the comparison instruction.
969 LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
970 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
971 LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
972 ++NumCmps;
973 return true;
974 }
975
976 return false;
977}
978
979/// Optimize a select instruction.
980bool PeepholeOptimizer::optimizeSelect(
982 unsigned TrueOp = 0;
983 unsigned FalseOp = 0;
984 bool Optimizable = false;
986 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
987 return false;
988 if (!Optimizable)
989 return false;
990 if (!TII->optimizeSelect(MI, LocalMIs))
991 return false;
992 LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
993 MI.eraseFromParent();
994 ++NumSelects;
995 return true;
996}
997
998/// Check if a simpler conditional branch can be generated.
999bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
1000 return TII->optimizeCondBranch(MI);
1001}
1002
1003/// Try to find the next source that share the same register file
1004/// for the value defined by \p Reg and \p SubReg.
1005/// When true is returned, the \p RewriteMap can be used by the client to
1006/// retrieve all Def -> Use along the way up to the next source. Any found
1007/// Use that is not itself a key for another entry, is the next source to
1008/// use. During the search for the next source, multiple sources can be found
1009/// given multiple incoming sources of a PHI instruction. In this case, we
1010/// look in each PHI source for the next source; all found next sources must
1011/// share the same register file as \p Reg and \p SubReg. The client should
1012/// then be capable to rewrite all intermediate PHIs to get the next source.
1013/// \return False if no alternative sources are available. True otherwise.
1014bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
1015 RewriteMapTy &RewriteMap) {
1016 // Do not try to find a new source for a physical register.
1017 // So far we do not have any motivating example for doing that.
1018 // Thus, instead of maintaining untested code, we will revisit that if
1019 // that changes at some point.
1020 Register Reg = RegSubReg.Reg;
1021 if (Reg.isPhysical())
1022 return false;
1023 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
1024
1026 RegSubRegPair CurSrcPair = RegSubReg;
1027 SrcToLook.push_back(CurSrcPair);
1028
1029 unsigned PHICount = 0;
1030 do {
1031 CurSrcPair = SrcToLook.pop_back_val();
1032 // As explained above, do not handle physical registers
1033 if (CurSrcPair.Reg.isPhysical())
1034 return false;
1035
1036 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
1037
1038 // Follow the chain of copies until we find a more suitable source, a phi
1039 // or have to abort.
1040 while (true) {
1041 ValueTrackerResult Res = ValTracker.getNextSource();
1042 // Abort at the end of a chain (without finding a suitable source).
1043 if (!Res.isValid())
1044 return false;
1045
1046 // Insert the Def -> Use entry for the recently found source.
1047 ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
1048 if (CurSrcRes.isValid()) {
1049 assert(CurSrcRes == Res && "ValueTrackerResult found must match");
1050 // An existent entry with multiple sources is a PHI cycle we must avoid.
1051 // Otherwise it's an entry with a valid next source we already found.
1052 if (CurSrcRes.getNumSources() > 1) {
1054 << "findNextSource: found PHI cycle, aborting...\n");
1055 return false;
1056 }
1057 break;
1058 }
1059 RewriteMap.insert(std::make_pair(CurSrcPair, Res));
1060
1061 // ValueTrackerResult usually have one source unless it's the result from
1062 // a PHI instruction. Add the found PHI edges to be looked up further.
1063 unsigned NumSrcs = Res.getNumSources();
1064 if (NumSrcs > 1) {
1065 PHICount++;
1066 if (PHICount >= RewritePHILimit) {
1067 LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
1068 return false;
1069 }
1070
1071 for (unsigned i = 0; i < NumSrcs; ++i)
1072 SrcToLook.push_back(Res.getSrc(i));
1073 break;
1074 }
1075
1076 CurSrcPair = Res.getSrc(0);
1077 // Do not extend the live-ranges of physical registers as they add
1078 // constraints to the register allocator. Moreover, if we want to extend
1079 // the live-range of a physical register, unlike SSA virtual register,
1080 // we will have to check that they aren't redefine before the related use.
1081 if (CurSrcPair.Reg.isPhysical())
1082 return false;
1083
1084 // Keep following the chain if the value isn't any better yet.
1085 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
1086 if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
1087 CurSrcPair.SubReg))
1088 continue;
1089
1090 // We currently cannot deal with subreg operands on PHI instructions
1091 // (see insertPHI()).
1092 if (PHICount > 0 && CurSrcPair.SubReg != 0)
1093 continue;
1094
1095 // We found a suitable source, and are done with this chain.
1096 break;
1097 }
1098 } while (!SrcToLook.empty());
1099
1100 // If we did not find a more suitable source, there is nothing to optimize.
1101 return CurSrcPair.Reg != Reg;
1102}
1103
1104/// Insert a PHI instruction with incoming edges \p SrcRegs that are
1105/// guaranteed to have the same register class. This is necessary whenever we
1106/// successfully traverse a PHI instruction and find suitable sources coming
1107/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
1108/// suitable to be used in a new COPY instruction.
1110 const TargetInstrInfo &TII,
1111 const SmallVectorImpl<RegSubRegPair> &SrcRegs,
1112 MachineInstr &OrigPHI) {
1113 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
1114
1115 const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
1116 // NewRC is only correct if no subregisters are involved. findNextSource()
1117 // should have rejected those cases already.
1118 assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
1119 Register NewVR = MRI.createVirtualRegister(NewRC);
1120 MachineBasicBlock *MBB = OrigPHI.getParent();
1121 MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
1122 TII.get(TargetOpcode::PHI), NewVR);
1123
1124 unsigned MBBOpIdx = 2;
1125 for (const RegSubRegPair &RegPair : SrcRegs) {
1126 MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
1127 MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
1128 // Since we're extended the lifetime of RegPair.Reg, clear the
1129 // kill flags to account for that and make RegPair.Reg reaches
1130 // the new PHI.
1131 MRI.clearKillFlags(RegPair.Reg);
1132 MBBOpIdx += 2;
1133 }
1134
1135 return *MIB;
1136}
1137
1138/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
1139/// the new source to use for rewrite. If \p HandleMultipleSources is true and
1140/// multiple sources for a given \p Def are found along the way, we found a
1141/// PHI instructions that needs to be rewritten.
1142/// TODO: HandleMultipleSources should be removed once we test PHI handling
1143/// with coalescable copies.
1144static RegSubRegPair
1146 RegSubRegPair Def,
1147 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1148 bool HandleMultipleSources = true) {
1149 RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1150 while (true) {
1151 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1152 // If there are no entries on the map, LookupSrc is the new source.
1153 if (!Res.isValid())
1154 return LookupSrc;
1155
1156 // There's only one source for this definition, keep searching...
1157 unsigned NumSrcs = Res.getNumSources();
1158 if (NumSrcs == 1) {
1159 LookupSrc.Reg = Res.getSrcReg(0);
1160 LookupSrc.SubReg = Res.getSrcSubReg(0);
1161 continue;
1162 }
1163
1164 // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1165 if (!HandleMultipleSources)
1166 break;
1167
1168 // Multiple sources, recurse into each source to find a new source
1169 // for it. Then, rewrite the PHI accordingly to its new edges.
1171 for (unsigned i = 0; i < NumSrcs; ++i) {
1172 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1173 NewPHISrcs.push_back(
1174 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1175 }
1176
1177 // Build the new PHI node and return its def register as the new source.
1178 MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1179 MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1180 LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1181 LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
1182 LLVM_DEBUG(dbgs() << " With: " << NewPHI);
1183 const MachineOperand &MODef = NewPHI.getOperand(0);
1184 return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1185 }
1186
1187 return RegSubRegPair(0, 0);
1188}
1189
1190bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
1191 bool Changed = false;
1192 // Get the right rewriter for the current copy.
1193 // Rewrite each rewritable source.
1194 RegSubRegPair Src;
1195 RegSubRegPair TrackPair;
1196 while (CpyRewriter.getNextRewritableSource(Src, TrackPair)) {
1197 // Keep track of PHI nodes and its incoming edges when looking for sources.
1198 RewriteMapTy RewriteMap;
1199 // Try to find a more suitable source. If we failed to do so, or get the
1200 // actual source, move to the next source.
1201 if (!findNextSource(TrackPair, RewriteMap))
1202 continue;
1203
1204 // Get the new source to rewrite. TODO: Only enable handling of multiple
1205 // sources (PHIs) once we have a motivating example and testcases for it.
1206 RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1207 /*HandleMultipleSources=*/false);
1208 if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
1209 continue;
1210
1211 // Rewrite source.
1212 if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1213 // We may have extended the live-range of NewSrc, account for that.
1214 MRI->clearKillFlags(NewSrc.Reg);
1215 Changed = true;
1216 }
1217 }
1218
1219 // TODO: We could have a clean-up method to tidy the instruction.
1220 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1221 // => v0 = COPY v1
1222 // Currently we haven't seen motivating example for that and we
1223 // want to avoid untested code.
1224 NumRewrittenCopies += Changed;
1225 return Changed;
1226}
1227
1228/// Optimize generic copy instructions to avoid cross register bank copy.
1229/// The optimization looks through a chain of copies and tries to find a source
1230/// that has a compatible register class.
1231/// Two register classes are considered to be compatible if they share the same
1232/// register bank.
1233/// New copies issued by this optimization are register allocator
1234/// friendly. This optimization does not remove any copy as it may
1235/// overconstrain the register allocator, but replaces some operands
1236/// when possible.
1237/// \pre isCoalescableCopy(*MI) is true.
1238/// \return True, when \p MI has been rewritten. False otherwise.
1239bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1240 assert(isCoalescableCopy(MI) && "Invalid argument");
1241 assert(MI.getDesc().getNumDefs() == 1 &&
1242 "Coalescer can understand multiple defs?!");
1243 const MachineOperand &MODef = MI.getOperand(0);
1244 // Do not rewrite physical definitions.
1245 if (MODef.getReg().isPhysical())
1246 return false;
1247
1248 switch (MI.getOpcode()) {
1249 case TargetOpcode::COPY:
1250 return optimizeCoalescableCopyImpl(CopyRewriter(MI));
1251 case TargetOpcode::INSERT_SUBREG:
1252 return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI));
1253 case TargetOpcode::EXTRACT_SUBREG:
1254 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII));
1255 case TargetOpcode::REG_SEQUENCE:
1256 return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI));
1257 default:
1258 // Handle uncoalescable copy-like instructions.
1259 if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1260 MI.isExtractSubregLike())
1261 return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI));
1262 return false;
1263 }
1264}
1265
1266/// Rewrite the source found through \p Def, by using the \p RewriteMap
1267/// and create a new COPY instruction. More info about RewriteMap in
1268/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1269/// Uncoalescable copies, since they are copy like instructions that aren't
1270/// recognized by the register allocator.
1271MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1272 RegSubRegPair Def,
1273 RewriteMapTy &RewriteMap) {
1274 assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
1275
1276 // Find the new source to use in the COPY rewrite.
1277 RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1278
1279 // Insert the COPY.
1280 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1281 Register NewVReg = MRI->createVirtualRegister(DefRC);
1282
1283 MachineInstr *NewCopy =
1284 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1285 TII->get(TargetOpcode::COPY), NewVReg)
1286 .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
1287
1288 if (Def.SubReg) {
1289 NewCopy->getOperand(0).setSubReg(Def.SubReg);
1290 NewCopy->getOperand(0).setIsUndef();
1291 }
1292
1293 LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1294 LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
1295 LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
1296 MRI->replaceRegWith(Def.Reg, NewVReg);
1297 MRI->clearKillFlags(NewVReg);
1298
1299 // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1300 // account for that.
1301 MRI->clearKillFlags(NewSrc.Reg);
1302
1303 return *NewCopy;
1304}
1305
1306/// Optimize copy-like instructions to create
1307/// register coalescer friendly instruction.
1308/// The optimization tries to kill-off the \p MI by looking
1309/// through a chain of copies to find a source that has a compatible
1310/// register class.
1311/// If such a source is found, it replace \p MI by a generic COPY
1312/// operation.
1313/// \pre isUncoalescableCopy(*MI) is true.
1314/// \return True, when \p MI has been optimized. In that case, \p MI has
1315/// been removed from its parent.
1316/// All COPY instructions created, are inserted in \p LocalMIs.
1317bool PeepholeOptimizer::optimizeUncoalescableCopy(
1319 assert(isUncoalescableCopy(MI) && "Invalid argument");
1320 UncoalescableRewriter CpyRewriter(MI);
1321
1322 // Rewrite each rewritable source by generating new COPYs. This works
1323 // differently from optimizeCoalescableCopy since it first makes sure that all
1324 // definitions can be rewritten.
1325 RewriteMapTy RewriteMap;
1326 RegSubRegPair Src;
1328 SmallVector<RegSubRegPair, 4> RewritePairs;
1329 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1330 // If a physical register is here, this is probably for a good reason.
1331 // Do not rewrite that.
1332 if (Def.Reg.isPhysical())
1333 return false;
1334
1335 // If we do not know how to rewrite this definition, there is no point
1336 // in trying to kill this instruction.
1337 if (!findNextSource(Def, RewriteMap))
1338 return false;
1339
1340 RewritePairs.push_back(Def);
1341 }
1342
1343 // The change is possible for all defs, do it.
1344 for (const RegSubRegPair &Def : RewritePairs) {
1345 // Rewrite the "copy" in a way the register coalescer understands.
1346 MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1347 LocalMIs.insert(&NewCopy);
1348 }
1349
1350 // MI is now dead.
1351 LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1352 MI.eraseFromParent();
1353 ++NumUncoalescableCopies;
1354 return true;
1355}
1356
1357/// Check whether MI is a candidate for folding into a later instruction.
1358/// We only fold loads to virtual registers and the virtual register defined
1359/// has a single user.
1360bool PeepholeOptimizer::isLoadFoldable(
1361 MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1362 if (!MI.canFoldAsLoad() || !MI.mayLoad())
1363 return false;
1364 const MCInstrDesc &MCID = MI.getDesc();
1365 if (MCID.getNumDefs() != 1)
1366 return false;
1367
1368 Register Reg = MI.getOperand(0).getReg();
1369 // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1370 // loads. It should be checked when processing uses of the load, since
1371 // uses can be removed during peephole.
1372 if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1373 MRI->hasOneNonDBGUser(Reg)) {
1374 FoldAsLoadDefCandidates.insert(Reg);
1375 return true;
1376 }
1377 return false;
1378}
1379
1380bool PeepholeOptimizer::isMoveImmediate(
1383 const MCInstrDesc &MCID = MI.getDesc();
1384 if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
1385 return false;
1386 Register Reg = MI.getOperand(0).getReg();
1387 if (!Reg.isVirtual())
1388 return false;
1389
1390 int64_t ImmVal;
1391 if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
1392 return false;
1393
1394 ImmDefMIs.insert(std::make_pair(Reg, &MI));
1395 ImmDefRegs.insert(Reg);
1396 return true;
1397}
1398
1399/// Try folding register operands that are defined by move immediate
1400/// instructions, i.e. a trivial constant folding optimization, if
1401/// and only if the def and use are in the same BB.
1402bool PeepholeOptimizer::foldImmediate(
1404 DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
1405 Deleted = false;
1406 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1407 MachineOperand &MO = MI.getOperand(i);
1408 if (!MO.isReg() || MO.isDef())
1409 continue;
1410 Register Reg = MO.getReg();
1411 if (!Reg.isVirtual())
1412 continue;
1413 if (ImmDefRegs.count(Reg) == 0)
1414 continue;
1416 assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1417 if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
1418 ++NumImmFold;
1419 // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
1420 // is not deleted, and we happened to get a same MI, we can delete MI and
1421 // replace its users.
1422 if (MRI->getVRegDef(Reg) &&
1423 MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
1424 Register DstReg = MI.getOperand(0).getReg();
1425 if (DstReg.isVirtual() &&
1426 MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
1427 MRI->replaceRegWith(DstReg, Reg);
1428 MI.eraseFromParent();
1429 Deleted = true;
1430 }
1431 }
1432 return true;
1433 }
1434 }
1435 return false;
1436}
1437
1438// FIXME: This is very simple and misses some cases which should be handled when
1439// motivating examples are found.
1440//
1441// The copy rewriting logic should look at uses as well as defs and be able to
1442// eliminate copies across blocks.
1443//
1444// Later copies that are subregister extracts will also not be eliminated since
1445// only the first copy is considered.
1446//
1447// e.g.
1448// %1 = COPY %0
1449// %2 = COPY %0:sub1
1450//
1451// Should replace %2 uses with %1:sub1
1452bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
1453 assert(MI.isCopy() && "expected a COPY machine instruction");
1454
1455 RegSubRegPair SrcPair;
1456 if (!getCopySrc(MI, SrcPair))
1457 return false;
1458
1459 Register DstReg = MI.getOperand(0).getReg();
1460 if (!DstReg.isVirtual())
1461 return false;
1462
1463 if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1464 // First copy of this reg seen.
1465 return false;
1466 }
1467
1468 MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
1469
1470 assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1471 "Unexpected mismatching subreg!");
1472
1473 Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1474
1475 // Only replace if the copy register class is the same.
1476 //
1477 // TODO: If we have multiple copies to different register classes, we may want
1478 // to track multiple copies of the same source register.
1479 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1480 return false;
1481
1482 MRI->replaceRegWith(DstReg, PrevDstReg);
1483
1484 // Lifetime of the previous copy has been extended.
1485 MRI->clearKillFlags(PrevDstReg);
1486 return true;
1487}
1488
1489bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1490 return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1491}
1492
1493bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1495 assert(MI.isCopy() && "expected a COPY machine instruction");
1496
1498 return false;
1499
1500 Register DstReg = MI.getOperand(0).getReg();
1501 Register SrcReg = MI.getOperand(1).getReg();
1502 if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1503 // %vreg = COPY $physreg
1504 // Avoid using a datastructure which can track multiple live non-allocatable
1505 // phys->virt copies since LLVM doesn't seem to do this.
1506 NAPhysToVirtMIs.insert({SrcReg, &MI});
1507 return false;
1508 }
1509
1510 if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1511 return false;
1512
1513 // $physreg = COPY %vreg
1514 auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1515 if (PrevCopy == NAPhysToVirtMIs.end()) {
1516 // We can't remove the copy: there was an intervening clobber of the
1517 // non-allocatable physical register after the copy to virtual.
1518 LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1519 << MI);
1520 return false;
1521 }
1522
1523 Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1524 if (PrevDstReg == SrcReg) {
1525 // Remove the virt->phys copy: we saw the virtual register definition, and
1526 // the non-allocatable physical register's state hasn't changed since then.
1527 LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1528 ++NumNAPhysCopies;
1529 return true;
1530 }
1531
1532 // Potential missed optimization opportunity: we saw a different virtual
1533 // register get a copy of the non-allocatable physical register, and we only
1534 // track one such copy. Avoid getting confused by this new non-allocatable
1535 // physical register definition, and remove it from the tracked copies.
1536 LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1537 NAPhysToVirtMIs.erase(PrevCopy);
1538 return false;
1539}
1540
1541/// \bried Returns true if \p MO is a virtual register operand.
1543 return MO.isReg() && MO.getReg().isVirtual();
1544}
1545
1546bool PeepholeOptimizer::findTargetRecurrence(
1547 Register Reg, const SmallSet<Register, 2> &TargetRegs,
1548 RecurrenceCycle &RC) {
1549 // Recurrence found if Reg is in TargetRegs.
1550 if (TargetRegs.count(Reg))
1551 return true;
1552
1553 // TODO: Curerntly, we only allow the last instruction of the recurrence
1554 // cycle (the instruction that feeds the PHI instruction) to have more than
1555 // one uses to guarantee that commuting operands does not tie registers
1556 // with overlapping live range. Once we have actual live range info of
1557 // each register, this constraint can be relaxed.
1558 if (!MRI->hasOneNonDBGUse(Reg))
1559 return false;
1560
1561 // Give up if the reccurrence chain length is longer than the limit.
1562 if (RC.size() >= MaxRecurrenceChain)
1563 return false;
1564
1565 MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1566 unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
1567
1568 // Only interested in recurrences whose instructions have only one def, which
1569 // is a virtual register.
1570 if (MI.getDesc().getNumDefs() != 1)
1571 return false;
1572
1573 MachineOperand &DefOp = MI.getOperand(0);
1574 if (!isVirtualRegisterOperand(DefOp))
1575 return false;
1576
1577 // Check if def operand of MI is tied to any use operand. We are only
1578 // interested in the case that all the instructions in the recurrence chain
1579 // have there def operand tied with one of the use operand.
1580 unsigned TiedUseIdx;
1581 if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1582 return false;
1583
1584 if (Idx == TiedUseIdx) {
1585 RC.push_back(RecurrenceInstr(&MI));
1586 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1587 } else {
1588 // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1589 unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1590 if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1591 RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1592 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1593 }
1594 }
1595
1596 return false;
1597}
1598
1599/// Phi instructions will eventually be lowered to copy instructions.
1600/// If phi is in a loop header, a recurrence may formulated around the source
1601/// and destination of the phi. For such case commuting operands of the
1602/// instructions in the recurrence may enable coalescing of the copy instruction
1603/// generated from the phi. For example, if there is a recurrence of
1604///
1605/// LoopHeader:
1606/// %1 = phi(%0, %100)
1607/// LoopLatch:
1608/// %0<def, tied1> = ADD %2<def, tied0>, %1
1609///
1610/// , the fact that %0 and %2 are in the same tied operands set makes
1611/// the coalescing of copy instruction generated from the phi in
1612/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1613/// %2 have overlapping live range. This introduces additional move
1614/// instruction to the final assembly. However, if we commute %2 and
1615/// %1 of ADD instruction, the redundant move instruction can be
1616/// avoided.
1617bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1618 SmallSet<Register, 2> TargetRegs;
1619 for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1620 MachineOperand &MO = PHI.getOperand(Idx);
1621 assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1622 TargetRegs.insert(MO.getReg());
1623 }
1624
1625 bool Changed = false;
1626 RecurrenceCycle RC;
1627 if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1628 // Commutes operands of instructions in RC if necessary so that the copy to
1629 // be generated from PHI can be coalesced.
1630 LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1631 for (auto &RI : RC) {
1632 LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1633 auto CP = RI.getCommutePair();
1634 if (CP) {
1635 Changed = true;
1636 TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1637 (*CP).second);
1638 LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1639 }
1640 }
1641 }
1642
1643 return Changed;
1644}
1645
1649 MFPropsModifier _(*this, MF);
1650 auto *DT =
1651 Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
1652 auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
1653 PeepholeOptimizer Impl(DT, MLI);
1654 bool Changed = Impl.run(MF);
1655 if (!Changed)
1656 return PreservedAnalyses::all();
1657
1659 PA.preserve<MachineDominatorTreeAnalysis>();
1660 PA.preserve<MachineLoopAnalysis>();
1661 PA.preserveSet<CFGAnalyses>();
1662 return PA;
1663}
1664
1665bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
1666 if (skipFunction(MF.getFunction()))
1667 return false;
1668 auto *DT = Aggressive
1669 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1670 : nullptr;
1671 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1672 PeepholeOptimizer Impl(DT, MLI);
1673 return Impl.run(MF);
1674}
1675
1676bool PeepholeOptimizer::run(MachineFunction &MF) {
1677
1678 LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1679 LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1680
1681 if (DisablePeephole)
1682 return false;
1683
1684 TII = MF.getSubtarget().getInstrInfo();
1686 MRI = &MF.getRegInfo();
1687 MF.setDelegate(this);
1688
1689 bool Changed = false;
1690
1691 for (MachineBasicBlock &MBB : MF) {
1692 bool SeenMoveImm = false;
1693
1694 // During this forward scan, at some point it needs to answer the question
1695 // "given a pointer to an MI in the current BB, is it located before or
1696 // after the current instruction".
1697 // To perform this, the following set keeps track of the MIs already seen
1698 // during the scan, if a MI is not in the set, it is assumed to be located
1699 // after. Newly created MIs have to be inserted in the set as well.
1701 SmallSet<Register, 4> ImmDefRegs;
1703 SmallSet<Register, 16> FoldAsLoadDefCandidates;
1704
1705 // Track when a non-allocatable physical register is copied to a virtual
1706 // register so that useless moves can be removed.
1707 //
1708 // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1709 // without any intervening re-definition of $physreg.
1710 DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1711
1712 CopySrcMIs.clear();
1713
1714 bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1715
1716 for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1717 MII != MIE;) {
1718 MachineInstr *MI = &*MII;
1719 // We may be erasing MI below, increment MII now.
1720 ++MII;
1721 LocalMIs.insert(MI);
1722
1723 // Skip debug instructions. They should not affect this peephole
1724 // optimization.
1725 if (MI->isDebugInstr())
1726 continue;
1727
1728 if (MI->isPosition())
1729 continue;
1730
1731 if (IsLoopHeader && MI->isPHI()) {
1732 if (optimizeRecurrence(*MI)) {
1733 Changed = true;
1734 continue;
1735 }
1736 }
1737
1738 if (!MI->isCopy()) {
1739 for (const MachineOperand &MO : MI->operands()) {
1740 // Visit all operands: definitions can be implicit or explicit.
1741 if (MO.isReg()) {
1742 Register Reg = MO.getReg();
1743 if (MO.isDef() && isNAPhysCopy(Reg)) {
1744 const auto &Def = NAPhysToVirtMIs.find(Reg);
1745 if (Def != NAPhysToVirtMIs.end()) {
1746 // A new definition of the non-allocatable physical register
1747 // invalidates previous copies.
1749 << "NAPhysCopy: invalidating because of " << *MI);
1750 NAPhysToVirtMIs.erase(Def);
1751 }
1752 }
1753 } else if (MO.isRegMask()) {
1754 const uint32_t *RegMask = MO.getRegMask();
1755 for (auto &RegMI : NAPhysToVirtMIs) {
1756 Register Def = RegMI.first;
1757 if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
1759 << "NAPhysCopy: invalidating because of " << *MI);
1760 NAPhysToVirtMIs.erase(Def);
1761 }
1762 }
1763 }
1764 }
1765 }
1766
1767 if (MI->isImplicitDef() || MI->isKill())
1768 continue;
1769
1770 if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1771 // Blow away all non-allocatable physical registers knowledge since we
1772 // don't know what's correct anymore.
1773 //
1774 // FIXME: handle explicit asm clobbers.
1775 LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1776 << *MI);
1777 NAPhysToVirtMIs.clear();
1778 }
1779
1780 if ((isUncoalescableCopy(*MI) &&
1781 optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1782 (MI->isCompare() && optimizeCmpInstr(*MI)) ||
1783 (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1784 // MI is deleted.
1785 LocalMIs.erase(MI);
1786 Changed = true;
1787 continue;
1788 }
1789
1790 if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1791 Changed = true;
1792 continue;
1793 }
1794
1795 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1796 // MI is just rewritten.
1797 Changed = true;
1798 continue;
1799 }
1800
1801 if (MI->isCopy() && (foldRedundantCopy(*MI) ||
1802 foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1803 LocalMIs.erase(MI);
1804 LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1805 MI->eraseFromParent();
1806 Changed = true;
1807 continue;
1808 }
1809
1810 if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1811 SeenMoveImm = true;
1812 } else {
1813 Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1814 // optimizeExtInstr might have created new instructions after MI
1815 // and before the already incremented MII. Adjust MII so that the
1816 // next iteration sees the new instructions.
1817 MII = MI;
1818 ++MII;
1819 if (SeenMoveImm) {
1820 bool Deleted;
1821 Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
1822 if (Deleted) {
1823 LocalMIs.erase(MI);
1824 continue;
1825 }
1826 }
1827 }
1828
1829 // Check whether MI is a load candidate for folding into a later
1830 // instruction. If MI is not a candidate, check whether we can fold an
1831 // earlier load into MI.
1832 if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1833 !FoldAsLoadDefCandidates.empty()) {
1834
1835 // We visit each operand even after successfully folding a previous
1836 // one. This allows us to fold multiple loads into a single
1837 // instruction. We do assume that optimizeLoadInstr doesn't insert
1838 // foldable uses earlier in the argument list. Since we don't restart
1839 // iteration, we'd miss such cases.
1840 const MCInstrDesc &MIDesc = MI->getDesc();
1841 for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) {
1842 const MachineOperand &MOp = MI->getOperand(i);
1843 if (!MOp.isReg())
1844 continue;
1845 Register FoldAsLoadDefReg = MOp.getReg();
1846 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1847 // We need to fold load after optimizeCmpInstr, since
1848 // optimizeCmpInstr can enable folding by converting SUB to CMP.
1849 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
1850 // we need it for markUsesInDebugValueAsUndef().
1851 Register FoldedReg = FoldAsLoadDefReg;
1852 MachineInstr *DefMI = nullptr;
1853 if (MachineInstr *FoldMI =
1854 TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
1855 // Update LocalMIs since we replaced MI with FoldMI and deleted
1856 // DefMI.
1857 LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
1858 LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
1859 LocalMIs.erase(MI);
1860 LocalMIs.erase(DefMI);
1861 LocalMIs.insert(FoldMI);
1862 // Update the call info.
1863 if (MI->shouldUpdateAdditionalCallInfo())
1864 MI->getMF()->moveAdditionalCallInfo(MI, FoldMI);
1865 MI->eraseFromParent();
1867 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1868 FoldAsLoadDefCandidates.erase(FoldedReg);
1869 ++NumLoadFold;
1870
1871 // MI is replaced with FoldMI so we can continue trying to fold
1872 Changed = true;
1873 MI = FoldMI;
1874 }
1875 }
1876 }
1877 }
1878
1879 // If we run into an instruction we can't fold across, discard
1880 // the load candidates. Note: We might be able to fold *into* this
1881 // instruction, so this needs to be after the folding logic.
1882 if (MI->isLoadFoldBarrier()) {
1883 LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1884 FoldAsLoadDefCandidates.clear();
1885 }
1886 }
1887 }
1888
1889 MF.resetDelegate(this);
1890 return Changed;
1891}
1892
1893ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1894 assert(Def->isCopy() && "Invalid definition");
1895 // Copy instruction are supposed to be: Def = Src.
1896 // If someone breaks this assumption, bad things will happen everywhere.
1897 // There may be implicit uses preventing the copy to be moved across
1898 // some target specific register definitions
1899 assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1900 "Invalid number of operands");
1901 assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1902
1903 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
1904 // If we look for a different subreg, it means we want a subreg of src.
1905 // Bails as we do not support composing subregs yet.
1906 return ValueTrackerResult();
1907 // Otherwise, we want the whole source.
1908 const MachineOperand &Src = Def->getOperand(1);
1909 if (Src.isUndef())
1910 return ValueTrackerResult();
1911 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1912}
1913
1914ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1915 assert(Def->isBitcast() && "Invalid definition");
1916
1917 // Bail if there are effects that a plain copy will not expose.
1918 if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1919 return ValueTrackerResult();
1920
1921 // Bitcasts with more than one def are not supported.
1922 if (Def->getDesc().getNumDefs() != 1)
1923 return ValueTrackerResult();
1924 const MachineOperand DefOp = Def->getOperand(DefIdx);
1925 if (DefOp.getSubReg() != DefSubReg)
1926 // If we look for a different subreg, it means we want a subreg of the src.
1927 // Bails as we do not support composing subregs yet.
1928 return ValueTrackerResult();
1929
1930 unsigned SrcIdx = Def->getNumOperands();
1931 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1932 ++OpIdx) {
1933 const MachineOperand &MO = Def->getOperand(OpIdx);
1934 if (!MO.isReg() || !MO.getReg())
1935 continue;
1936 // Ignore dead implicit defs.
1937 if (MO.isImplicit() && MO.isDead())
1938 continue;
1939 assert(!MO.isDef() && "We should have skipped all the definitions by now");
1940 if (SrcIdx != EndOpIdx)
1941 // Multiple sources?
1942 return ValueTrackerResult();
1943 SrcIdx = OpIdx;
1944 }
1945
1946 // In some rare case, Def has no input, SrcIdx is out of bound,
1947 // getOperand(SrcIdx) will fail below.
1948 if (SrcIdx >= Def->getNumOperands())
1949 return ValueTrackerResult();
1950
1951 // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
1952 // will break the assumed guarantees for the upper bits.
1953 for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
1954 if (UseMI.isSubregToReg())
1955 return ValueTrackerResult();
1956 }
1957
1958 const MachineOperand &Src = Def->getOperand(SrcIdx);
1959 if (Src.isUndef())
1960 return ValueTrackerResult();
1961 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1962}
1963
1964ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1965 assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
1966 "Invalid definition");
1967
1968 if (Def->getOperand(DefIdx).getSubReg())
1969 // If we are composing subregs, bail out.
1970 // The case we are checking is Def.<subreg> = REG_SEQUENCE.
1971 // This should almost never happen as the SSA property is tracked at
1972 // the register level (as opposed to the subreg level).
1973 // I.e.,
1974 // Def.sub0 =
1975 // Def.sub1 =
1976 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
1977 // Def. Thus, it must not be generated.
1978 // However, some code could theoretically generates a single
1979 // Def.sub0 (i.e, not defining the other subregs) and we would
1980 // have this case.
1981 // If we can ascertain (or force) that this never happens, we could
1982 // turn that into an assertion.
1983 return ValueTrackerResult();
1984
1986 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
1987 return ValueTrackerResult();
1988
1989 // We are looking at:
1990 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
1991 // Check if one of the operand defines the subreg we are interested in.
1992 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
1993 if (RegSeqInput.SubIdx == DefSubReg)
1994 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
1995 }
1996
1997 // If the subreg we are tracking is super-defined by another subreg,
1998 // we could follow this value. However, this would require to compose
1999 // the subreg and we do not do that for now.
2000 return ValueTrackerResult();
2001}
2002
2003ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2004 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
2005 "Invalid definition");
2006
2007 if (Def->getOperand(DefIdx).getSubReg())
2008 // If we are composing subreg, bail out.
2009 // Same remark as getNextSourceFromRegSequence.
2010 // I.e., this may be turned into an assert.
2011 return ValueTrackerResult();
2012
2013 RegSubRegPair BaseReg;
2014 RegSubRegPairAndIdx InsertedReg;
2015 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2016 return ValueTrackerResult();
2017
2018 // We are looking at:
2019 // Def = INSERT_SUBREG v0, v1, sub1
2020 // There are two cases:
2021 // 1. DefSubReg == sub1, get v1.
2022 // 2. DefSubReg != sub1, the value may be available through v0.
2023
2024 // #1 Check if the inserted register matches the required sub index.
2025 if (InsertedReg.SubIdx == DefSubReg) {
2026 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
2027 }
2028 // #2 Otherwise, if the sub register we are looking for is not partial
2029 // defined by the inserted element, we can look through the main
2030 // register (v0).
2031 const MachineOperand &MODef = Def->getOperand(DefIdx);
2032 // If the result register (Def) and the base register (v0) do not
2033 // have the same register class or if we have to compose
2034 // subregisters, bail out.
2035 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
2036 BaseReg.SubReg)
2037 return ValueTrackerResult();
2038
2039 // Get the TRI and check if the inserted sub-register overlaps with the
2040 // sub-register we are tracking.
2041 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2042 if ((TRI->getSubRegIndexLaneMask(DefSubReg) &
2043 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx))
2044 .any())
2045 return ValueTrackerResult();
2046 // At this point, the value is available in v0 via the same subreg
2047 // we used for Def.
2048 return ValueTrackerResult(BaseReg.Reg, DefSubReg);
2049}
2050
2051ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2052 assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) &&
2053 "Invalid definition");
2054 // We are looking at:
2055 // Def = EXTRACT_SUBREG v0, sub0
2056
2057 // Bail if we have to compose sub registers.
2058 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
2059 if (DefSubReg)
2060 return ValueTrackerResult();
2061
2062 RegSubRegPairAndIdx ExtractSubregInputReg;
2063 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2064 return ValueTrackerResult();
2065
2066 // Bail if we have to compose sub registers.
2067 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2068 if (ExtractSubregInputReg.SubReg)
2069 return ValueTrackerResult();
2070 // Otherwise, the value is available in the v0.sub0.
2071 return ValueTrackerResult(ExtractSubregInputReg.Reg,
2072 ExtractSubregInputReg.SubIdx);
2073}
2074
2075ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2076 assert(Def->isSubregToReg() && "Invalid definition");
2077 // We are looking at:
2078 // Def = SUBREG_TO_REG Imm, v0, sub0
2079
2080 // Bail if we have to compose sub registers.
2081 // If DefSubReg != sub0, we would have to check that all the bits
2082 // we track are included in sub0 and if yes, we would have to
2083 // determine the right subreg in v0.
2084 if (DefSubReg != Def->getOperand(3).getImm())
2085 return ValueTrackerResult();
2086 // Bail if we have to compose sub registers.
2087 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2088 if (Def->getOperand(2).getSubReg())
2089 return ValueTrackerResult();
2090
2091 return ValueTrackerResult(Def->getOperand(2).getReg(),
2092 Def->getOperand(3).getImm());
2093}
2094
2095/// Explore each PHI incoming operand and return its sources.
2096ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2097 assert(Def->isPHI() && "Invalid definition");
2098 ValueTrackerResult Res;
2099
2100 // If we look for a different subreg, bail as we do not support composing
2101 // subregs yet.
2102 if (Def->getOperand(0).getSubReg() != DefSubReg)
2103 return ValueTrackerResult();
2104
2105 // Return all register sources for PHI instructions.
2106 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2107 const MachineOperand &MO = Def->getOperand(i);
2108 assert(MO.isReg() && "Invalid PHI instruction");
2109 // We have no code to deal with undef operands. They shouldn't happen in
2110 // normal programs anyway.
2111 if (MO.isUndef())
2112 return ValueTrackerResult();
2113 Res.addSource(MO.getReg(), MO.getSubReg());
2114 }
2115
2116 return Res;
2117}
2118
2119ValueTrackerResult ValueTracker::getNextSourceImpl() {
2120 assert(Def && "This method needs a valid definition");
2121
2122 assert(((Def->getOperand(DefIdx).isDef() &&
2123 (DefIdx < Def->getDesc().getNumDefs() ||
2124 Def->getDesc().isVariadic())) ||
2125 Def->getOperand(DefIdx).isImplicit()) &&
2126 "Invalid DefIdx");
2127 if (Def->isCopy())
2128 return getNextSourceFromCopy();
2129 if (Def->isBitcast())
2130 return getNextSourceFromBitcast();
2131 // All the remaining cases involve "complex" instructions.
2132 // Bail if we did not ask for the advanced tracking.
2134 return ValueTrackerResult();
2135 if (Def->isRegSequence() || Def->isRegSequenceLike())
2136 return getNextSourceFromRegSequence();
2137 if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2138 return getNextSourceFromInsertSubreg();
2139 if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2140 return getNextSourceFromExtractSubreg();
2141 if (Def->isSubregToReg())
2142 return getNextSourceFromSubregToReg();
2143 if (Def->isPHI())
2144 return getNextSourceFromPHI();
2145 return ValueTrackerResult();
2146}
2147
2148ValueTrackerResult ValueTracker::getNextSource() {
2149 // If we reach a point where we cannot move up in the use-def chain,
2150 // there is nothing we can get.
2151 if (!Def)
2152 return ValueTrackerResult();
2153
2154 ValueTrackerResult Res = getNextSourceImpl();
2155 if (Res.isValid()) {
2156 // Update definition, definition index, and subregister for the
2157 // next call of getNextSource.
2158 // Update the current register.
2159 bool OneRegSrc = Res.getNumSources() == 1;
2160 if (OneRegSrc)
2161 Reg = Res.getSrcReg(0);
2162 // Update the result before moving up in the use-def chain
2163 // with the instruction containing the last found sources.
2164 Res.setInst(Def);
2165
2166 // If we can still move up in the use-def chain, move to the next
2167 // definition.
2168 if (!Reg.isPhysical() && OneRegSrc) {
2169 MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
2170 if (DI != MRI.def_end()) {
2171 Def = DI->getParent();
2172 DefIdx = DI.getOperandNo();
2173 DefSubReg = Res.getSrcSubReg(0);
2174 } else {
2175 Def = nullptr;
2176 }
2177 return Res;
2178 }
2179 }
2180 // If we end up here, this means we will not be able to find another source
2181 // for the next iteration. Make sure any new call to getNextSource bails out
2182 // early by cutting the use-def chain.
2183 Def = nullptr;
2184 return Res;
2185}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
TargetInstrInfo::RegSubRegPair RegSubRegPair
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
TargetInstrInfo::RegSubRegPair RegSubRegPair
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
Peephole Optimizations
#define DEBUG_TYPE
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Virtual Register Rewriter
Definition: VirtRegMap.cpp:261
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
An RAII based helper class to modify MachineFunctionProperties when running pass.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
virtual void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID)
Callback before changing MCInstrDesc.
virtual void MF_HandleRemoval(MachineInstr &MI)=0
Callback before a removal. This should not modify the MI directly.
virtual void MF_HandleInsertion(MachineInstr &MI)=0
Callback after an insertion. This should not modify the MI directly.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::pair< unsigned, unsigned > IndexPair
The pair of an instruction index and a operand index.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePeepholeOptimizerLegacyPass(PassRegistry &)
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.