LLVM 23.0.0git
PeepholeOptimizer.cpp
Go to the documentation of this file.
1//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Perform peephole optimizations on the machine code:
10//
11// - Optimize Extensions
12//
13// Optimization of sign / zero extension instructions. It may be extended to
14// handle other instructions with similar properties.
15//
16// On some targets, some instructions, e.g. X86 sign / zero extension, may
17// leave the source value in the lower part of the result. This optimization
18// will replace some uses of the pre-extension value with uses of the
19// sub-register of the results.
20//
21// - Optimize Comparisons
22//
23// Optimization of comparison instructions. For instance, in this code:
24//
25// sub r1, 1
26// cmp r1, 0
27// bz L1
28//
29// If the "sub" instruction all ready sets (or could be modified to set) the
30// same flag that the "cmp" instruction sets and that "bz" uses, then we can
31// eliminate the "cmp" instruction.
32//
33// Another instance, in this code:
34//
35// sub r1, r3 | sub r1, imm
36// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37// bge L1
38//
39// If the branch instruction can use flag from "sub", then we can replace
40// "sub" with "subs" and eliminate the "cmp" instruction.
41//
42// - Optimize Loads:
43//
44// Loads that can be folded into a later instruction. A load is foldable
45// if it loads to virtual registers and the virtual register defined has
46// a single use.
47//
48// - Optimize Copies and Bitcast (more generally, target specific copies):
49//
50// Rewrite copies and bitcasts to avoid cross register bank copies
51// when possible.
52// E.g., Consider the following example, where capital and lower
53// letters denote different register file:
54// b = copy A <-- cross-bank copy
55// C = copy b <-- cross-bank copy
56// =>
57// b = copy A <-- cross-bank copy
58// C = copy A <-- same-bank copy
59//
60// E.g., for bitcast:
61// b = bitcast A <-- cross-bank copy
62// C = bitcast b <-- cross-bank copy
63// =>
64// b = bitcast A <-- cross-bank copy
65// C = copy A <-- same-bank copy
66//===----------------------------------------------------------------------===//
67
69#include "llvm/ADT/DenseMap.h"
71#include "llvm/ADT/SmallSet.h"
73#include "llvm/ADT/Statistic.h"
89#include "llvm/MC/LaneBitmask.h"
90#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/Pass.h"
93#include "llvm/Support/Debug.h"
95#include <cassert>
96#include <cstdint>
97#include <utility>
98
99using namespace llvm;
102
103#define DEBUG_TYPE "peephole-opt"
104
105// Optimize Extensions
106static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
107 cl::desc("Aggressive extension optimization"));
108
109static cl::opt<bool>
110 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
111 cl::desc("Disable the peephole optimizer"));
112
113/// Specifiy whether or not the value tracking looks through
114/// complex instructions. When this is true, the value tracker
115/// bails on everything that is not a copy or a bitcast.
116static cl::opt<bool>
117 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
118 cl::desc("Disable advanced copy optimization"));
119
121 "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
122 cl::desc("Disable non-allocatable physical register copy optimization"));
123
124// Limit the number of PHI instructions to process
125// in PeepholeOptimizer::getNextSource.
127 RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10),
128 cl::desc("Limit the length of PHI chains to lookup"));
129
130// Limit the length of recurrence chain when evaluating the benefit of
131// commuting operands.
133 "recurrence-chain-limit", cl::Hidden, cl::init(3),
134 cl::desc("Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
136
137STATISTIC(NumReuse, "Number of extension results reused");
138STATISTIC(NumCmps, "Number of compares eliminated");
139STATISTIC(NumImmFold, "Number of move immediate folded");
140STATISTIC(NumLoadFold, "Number of loads folded");
141STATISTIC(NumSelects, "Number of selects optimized");
142STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
144STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
145
146namespace {
147
148class ValueTrackerResult;
149class RecurrenceInstr;
150
151/// Interface to query instructions amenable to copy rewriting.
152class Rewriter {
153protected:
154 MachineInstr &CopyLike;
155 int CurrentSrcIdx = 0; ///< The index of the source being rewritten.
156public:
157 Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
158 virtual ~Rewriter() = default;
159
160 /// Get the next rewritable source (SrcReg, SrcSubReg) and
161 /// the related value that it affects (DstReg, DstSubReg).
162 /// A source is considered rewritable if its register class and the
163 /// register class of the related DstReg may not be register
164 /// coalescer friendly. In other words, given a copy-like instruction
165 /// not all the arguments may be returned at rewritable source, since
166 /// some arguments are none to be register coalescer friendly.
167 ///
168 /// Each call of this method moves the current source to the next
169 /// rewritable source.
170 /// For instance, let CopyLike be the instruction to rewrite.
171 /// CopyLike has one definition and one source:
172 /// dst.dstSubIdx = CopyLike src.srcSubIdx.
173 ///
174 /// The first call will give the first rewritable source, i.e.,
175 /// the only source this instruction has:
176 /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
177 /// This source defines the whole definition, i.e.,
178 /// (DstReg, DstSubReg) = (dst, dstSubIdx).
179 ///
180 /// The second and subsequent calls will return false, as there is only one
181 /// rewritable source.
182 ///
183 /// \return True if a rewritable source has been found, false otherwise.
184 /// The output arguments are valid if and only if true is returned.
185 virtual bool getNextRewritableSource(RegSubRegPair &Src,
186 RegSubRegPair &Dst) = 0;
187
188 /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
189 /// \return True if the rewriting was possible, false otherwise.
190 virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
191};
192
193/// Rewriter for COPY instructions.
194class CopyRewriter : public Rewriter {
195public:
196 CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
197 assert(MI.isCopy() && "Expected copy instruction");
198 }
199 ~CopyRewriter() override = default;
200
201 bool getNextRewritableSource(RegSubRegPair &Src,
202 RegSubRegPair &Dst) override {
203 if (++CurrentSrcIdx > 1)
204 return false;
205
206 // The rewritable source is the argument.
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
208 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
209 // What we track are the alternative sources of the definition.
210 const MachineOperand &MODef = CopyLike.getOperand(0);
211 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
212 return true;
213 }
214
215 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
217 MOSrc.setReg(NewReg);
218 MOSrc.setSubReg(NewSubReg);
219 return true;
220 }
221};
222
223/// Helper class to rewrite uncoalescable copy like instructions
224/// into new COPY (coalescable friendly) instructions.
225class UncoalescableRewriter : public Rewriter {
226 int NumDefs; ///< Number of defs in the bitcast.
227
228public:
229 UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
230 NumDefs = MI.getDesc().getNumDefs();
231 }
232
233 /// \see See Rewriter::getNextRewritableSource()
234 /// All such sources need to be considered rewritable in order to
235 /// rewrite a uncoalescable copy-like instruction. This method return
236 /// each definition that must be checked if rewritable.
237 bool getNextRewritableSource(RegSubRegPair &Src,
238 RegSubRegPair &Dst) override {
239 // Find the next non-dead definition and continue from there.
240 if (CurrentSrcIdx == NumDefs)
241 return false;
242
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
244 ++CurrentSrcIdx;
245 if (CurrentSrcIdx == NumDefs)
246 return false;
247 }
248
249 // What we track are the alternative sources of the definition.
250 Src = RegSubRegPair(0, 0);
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
252 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
253
254 CurrentSrcIdx++;
255 return true;
256 }
257
258 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
259 return false;
260 }
261};
262
263/// Specialized rewriter for INSERT_SUBREG instruction.
264class InsertSubregRewriter : public Rewriter {
265public:
266 InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
267 assert(MI.isInsertSubreg() && "Invalid instruction");
268 }
269
270 /// \see See Rewriter::getNextRewritableSource()
271 /// Here CopyLike has the following form:
272 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
273 /// Src1 has the same register class has dst, hence, there is
274 /// nothing to rewrite.
275 /// Src2.src2SubIdx, may not be register coalescer friendly.
276 /// Therefore, the first call to this method returns:
277 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
278 /// (DstReg, DstSubReg) = (dst, subIdx).
279 ///
280 /// Subsequence calls will return false.
281 bool getNextRewritableSource(RegSubRegPair &Src,
282 RegSubRegPair &Dst) override {
283 // If we already get the only source we can rewrite, return false.
284 if (CurrentSrcIdx == 2)
285 return false;
286 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
287 CurrentSrcIdx = 2;
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
289 Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
290 const MachineOperand &MODef = CopyLike.getOperand(0);
291
292 // We want to track something that is compatible with the
293 // partial definition.
294 if (MODef.getSubReg())
295 // Bail if we have to compose sub-register indices.
296 return false;
297 Dst = RegSubRegPair(MODef.getReg(),
298 (unsigned)CopyLike.getOperand(3).getImm());
299 return true;
300 }
301
302 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
303 if (CurrentSrcIdx != 2)
304 return false;
305 // We are rewriting the inserted reg.
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
307 MO.setReg(NewReg);
308 MO.setSubReg(NewSubReg);
309 return true;
310 }
311};
312
313/// Specialized rewriter for EXTRACT_SUBREG instruction.
314class ExtractSubregRewriter : public Rewriter {
315 const TargetInstrInfo &TII;
316
317public:
318 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
319 : Rewriter(MI), TII(TII) {
320 assert(MI.isExtractSubreg() && "Invalid instruction");
321 }
322
323 /// \see Rewriter::getNextRewritableSource()
324 /// Here CopyLike has the following form:
325 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
326 /// There is only one rewritable source: Src.subIdx,
327 /// which defines dst.dstSubIdx.
328 bool getNextRewritableSource(RegSubRegPair &Src,
329 RegSubRegPair &Dst) override {
330 // If we already get the only source we can rewrite, return false.
331 if (CurrentSrcIdx == 1)
332 return false;
333 // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
334 CurrentSrcIdx = 1;
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
336 // If we have to compose sub-register indices, bail out.
337 if (MOExtractedReg.getSubReg())
338 return false;
339
340 Src =
341 RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
342
343 // We want to track something that is compatible with the definition.
344 const MachineOperand &MODef = CopyLike.getOperand(0);
345 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
346 return true;
347 }
348
349 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
350 // The only source we can rewrite is the input register.
351 if (CurrentSrcIdx != 1)
352 return false;
353
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
355
356 // If we find a source that does not require to extract something,
357 // rewrite the operation with a copy.
358 if (!NewSubReg) {
359 // Move the current index to an invalid position.
360 // We do not want another call to this method to be able
361 // to do any change.
362 CurrentSrcIdx = -1;
363 // Rewrite the operation as a COPY.
364 // Get rid of the sub-register index.
365 CopyLike.removeOperand(2);
366 // Morph the operation into a COPY.
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
368 return true;
369 }
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
371 return true;
372 }
373};
374
375/// Specialized rewriter for REG_SEQUENCE instruction.
376class RegSequenceRewriter : public Rewriter {
377public:
378 RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
379 assert(MI.isRegSequence() && "Invalid instruction");
380 CurrentSrcIdx = -1;
381 }
382
383 /// \see Rewriter::getNextRewritableSource()
384 /// Here CopyLike has the following form:
385 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
386 /// Each call will return a different source, walking all the available
387 /// source.
388 ///
389 /// The first call returns:
390 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
391 /// (DstReg, DstSubReg) = (dst, subIdx1).
392 ///
393 /// The second call returns:
394 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
395 /// (DstReg, DstSubReg) = (dst, subIdx2).
396 ///
397 /// And so on, until all the sources have been traversed, then
398 /// it returns false.
399 bool getNextRewritableSource(RegSubRegPair &Src,
400 RegSubRegPair &Dst) override {
401 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
402 CurrentSrcIdx += 2;
403 if (static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
404 return false;
405
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.getReg();
408 Src.SubReg = MOInsertedReg.getSubReg();
409
410 // We want to track something that is compatible with the related
411 // partial definition.
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
413
414 const MachineOperand &MODef = CopyLike.getOperand(0);
415 Dst.Reg = MODef.getReg();
416 assert(MODef.getSubReg() == 0 && "cannot have subregister def in SSA");
417 return true;
418 }
419
420 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
422 MO.setReg(NewReg);
423 MO.setSubReg(NewSubReg);
424 return true;
425 }
426};
427
428class PeepholeOptimizer : private MachineFunction::Delegate {
429 const TargetInstrInfo *TII = nullptr;
430 const TargetRegisterInfo *TRI = nullptr;
431 MachineRegisterInfo *MRI = nullptr;
432 MachineDominatorTree *DT = nullptr; // Machine dominator tree
433 MachineLoopInfo *MLI = nullptr;
434
435public:
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
438
439 bool run(MachineFunction &MF);
440 /// Track Def -> Use info used for rewriting copies.
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
442
443 /// Sequence of instructions that formulate recurrence cycle.
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
445
446private:
447 bool optimizeCmpInstr(MachineInstr &MI);
448 bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &MI);
453
454 bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &MI);
456 bool optimizeUncoalescableCopy(MachineInstr &MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &PHI);
459 bool findNextSource(const TargetRegisterClass *DefRC, unsigned DefSubReg,
460 RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
461 bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
465 bool &Deleted);
466
467 /// Finds recurrence cycles, but only ones that formulated around
468 /// a def operand and a use operand that are tied. If there is a use
469 /// operand commutable with the tied use operand, find recurrence cycle
470 /// along that operand as well.
471 bool findTargetRecurrence(Register Reg,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
474
475 /// If copy instruction \p MI is a virtual register copy or a copy of a
476 /// constant physical register to a virtual register, track it in the
477 /// set CopySrcMIs. If this virtual register was previously seen as a
478 /// copy, replace the uses of this copy with the previously seen copy's
479 /// destination register.
480 bool foldRedundantCopy(MachineInstr &MI);
481
482 /// Is the register \p Reg a non-allocatable physical register?
483 bool isNAPhysCopy(Register Reg);
484
485 /// If copy instruction \p MI is a non-allocatable virtual<->physical
486 /// register copy, track it in the \p NAPhysToVirtMIs map. If this
487 /// non-allocatable physical register was previously copied to a virtual
488 /// registered and hasn't been clobbered, the virt->phys copy can be
489 /// deleted.
490 bool
491 foldRedundantNAPhysCopy(MachineInstr &MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
493
494 bool isLoadFoldable(MachineInstr &MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
496
497 /// Check whether \p MI is understood by the register coalescer
498 /// but may require some rewriting.
499 static bool isCoalescableCopy(const MachineInstr &MI) {
500 // SubregToRegs are not interesting, because they are already register
501 // coalescer friendly.
502 return MI.isCopy() ||
503 (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
504 MI.isExtractSubreg()));
505 }
506
507 /// Check whether \p MI is a copy like instruction that is
508 /// not recognized by the register coalescer.
509 static bool isUncoalescableCopy(const MachineInstr &MI) {
510 return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
513 }
514
515 MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
517
518 // Set of copies to virtual registers keyed by source register. Never
519 // holds any physreg which requires def tracking.
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
521
522 // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
523 void MF_HandleInsertion(MachineInstr &MI) override {}
524
525 bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
526 if (!MI.isCopy())
527 return false;
528
529 Register SrcReg = MI.getOperand(1).getReg();
530 unsigned SrcSubReg = MI.getOperand(1).getSubReg();
531 if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
532 return false;
533
534 SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
535 return true;
536 }
537
538 // If a COPY instruction is to be deleted or changed, we should also remove
539 // it from CopySrcMIs.
540 void deleteChangedCopy(MachineInstr &MI) {
541 RegSubRegPair SrcPair;
542 if (!getCopySrc(MI, SrcPair))
543 return;
544
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &MI)
547 CopySrcMIs.erase(It);
548 }
549
550 void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
551
552 void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
553 deleteChangedCopy(MI);
554 }
555};
556
557class PeepholeOptimizerLegacy : public MachineFunctionPass {
558public:
559 static char ID; // Pass identification
560
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {}
562
563 bool runOnMachineFunction(MachineFunction &MF) override;
564
565 void getAnalysisUsage(AnalysisUsage &AU) const override {
566 AU.setPreservesCFG();
568 AU.addRequired<MachineLoopInfoWrapperPass>();
569 AU.addPreserved<MachineLoopInfoWrapperPass>();
570 if (Aggressive) {
571 AU.addRequired<MachineDominatorTreeWrapperPass>();
572 AU.addPreserved<MachineDominatorTreeWrapperPass>();
573 }
574 }
575
576 MachineFunctionProperties getRequiredProperties() const override {
577 return MachineFunctionProperties().setIsSSA();
578 }
579};
580
581/// Helper class to hold instructions that are inside recurrence cycles.
582/// The recurrence cycle is formulated around 1) a def operand and its
583/// tied use operand, or 2) a def operand and a use operand that is commutable
584/// with another use operand which is tied to the def operand. In the latter
585/// case, index of the tied use operand and the commutable use operand are
586/// maintained with CommutePair.
587class RecurrenceInstr {
588public:
589 using IndexPair = std::pair<unsigned, unsigned>;
590
591 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
592 RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
593 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
594
595 MachineInstr *getMI() const { return MI; }
596 std::optional<IndexPair> getCommutePair() const { return CommutePair; }
597
598private:
599 MachineInstr *MI;
600 std::optional<IndexPair> CommutePair;
601};
602
603/// Helper class to hold a reply for ValueTracker queries.
604/// Contains the returned sources for a given search and the instructions
605/// where the sources were tracked from.
606class ValueTrackerResult {
607private:
608 /// Track all sources found by one ValueTracker query.
610
611 /// Instruction using the sources in 'RegSrcs'.
612 const MachineInstr *Inst = nullptr;
613
614public:
615 ValueTrackerResult() = default;
616
617 ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
618
619 bool isValid() const { return getNumSources() > 0; }
620
621 void setInst(const MachineInstr *I) { Inst = I; }
622 const MachineInstr *getInst() const { return Inst; }
623
624 void clear() {
625 RegSrcs.clear();
626 Inst = nullptr;
627 }
628
629 void addSource(Register SrcReg, unsigned SrcSubReg) {
630 RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
631 }
632
633 void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
634 assert(Idx < getNumSources() && "Reg pair source out of index");
635 RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
636 }
637
638 int getNumSources() const { return RegSrcs.size(); }
639
640 RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
641
642 Register getSrcReg(int Idx) const {
643 assert(Idx < getNumSources() && "Reg source out of index");
644 return RegSrcs[Idx].Reg;
645 }
646
647 unsigned getSrcSubReg(int Idx) const {
648 assert(Idx < getNumSources() && "SubReg source out of index");
649 return RegSrcs[Idx].SubReg;
650 }
651
652 bool operator==(const ValueTrackerResult &Other) const {
653 if (Other.getInst() != getInst())
654 return false;
655
656 if (Other.getNumSources() != getNumSources())
657 return false;
658
659 for (int i = 0, e = Other.getNumSources(); i != e; ++i)
660 if (Other.getSrcReg(i) != getSrcReg(i) ||
661 Other.getSrcSubReg(i) != getSrcSubReg(i))
662 return false;
663 return true;
664 }
665};
666
667/// Helper class to track the possible sources of a value defined by
668/// a (chain of) copy related instructions.
669/// Given a definition (instruction and definition index), this class
670/// follows the use-def chain to find successive suitable sources.
671/// The given source can be used to rewrite the definition into
672/// def = COPY src.
673///
674/// For instance, let us consider the following snippet:
675/// v0 =
676/// v2 = INSERT_SUBREG v1, v0, sub0
677/// def = COPY v2.sub0
678///
679/// Using a ValueTracker for def = COPY v2.sub0 will give the following
680/// suitable sources:
681/// v2.sub0 and v0.
682/// Then, def can be rewritten into def = COPY v0.
683class ValueTracker {
684private:
685 /// The current point into the use-def chain.
686 const MachineInstr *Def = nullptr;
687
688 /// The index of the definition in Def.
689 unsigned DefIdx = 0;
690
691 /// The sub register index of the definition.
692 unsigned DefSubReg;
693
694 /// The register where the value can be found.
695 Register Reg;
696
697 /// MachineRegisterInfo used to perform tracking.
698 const MachineRegisterInfo &MRI;
699
700 /// Optional TargetInstrInfo used to perform some complex tracking.
701 const TargetInstrInfo *TII;
702
703 /// Dispatcher to the right underlying implementation of getNextSource.
704 ValueTrackerResult getNextSourceImpl();
705
706 /// Specialized version of getNextSource for Copy instructions.
707 ValueTrackerResult getNextSourceFromCopy();
708
709 /// Specialized version of getNextSource for Bitcast instructions.
710 ValueTrackerResult getNextSourceFromBitcast();
711
712 /// Specialized version of getNextSource for RegSequence instructions.
713 ValueTrackerResult getNextSourceFromRegSequence();
714
715 /// Specialized version of getNextSource for InsertSubreg instructions.
716 ValueTrackerResult getNextSourceFromInsertSubreg();
717
718 /// Specialized version of getNextSource for ExtractSubreg instructions.
719 ValueTrackerResult getNextSourceFromExtractSubreg();
720
721 /// Specialized version of getNextSource for SubregToReg instructions.
722 ValueTrackerResult getNextSourceFromSubregToReg();
723
724 /// Specialized version of getNextSource for PHI instructions.
725 ValueTrackerResult getNextSourceFromPHI();
726
727public:
728 /// Create a ValueTracker instance for the value defined by \p Reg.
729 /// \p DefSubReg represents the sub register index the value tracker will
730 /// track. It does not need to match the sub register index used in the
731 /// definition of \p Reg.
732 /// If \p Reg is a physical register, a value tracker constructed with
733 /// this constructor will not find any alternative source.
734 /// Indeed, when \p Reg is a physical register that constructor does not
735 /// know which definition of \p Reg it should track.
736 /// Use the next constructor to track a physical register.
737 ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI,
738 const TargetInstrInfo *TII = nullptr)
739 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
740 if (!Reg.isPhysical()) {
741 Def = MRI.getVRegDef(Reg);
742 DefIdx = MRI.def_begin(Reg).getOperandNo();
743 }
744 }
745
746 /// Following the use-def chain, get the next available source
747 /// for the tracked value.
748 /// \return A ValueTrackerResult containing a set of registers
749 /// and sub registers with tracked values. A ValueTrackerResult with
750 /// an empty set of registers means no source was found.
751 ValueTrackerResult getNextSource();
752};
753
754} // end anonymous namespace
755
756char PeepholeOptimizerLegacy::ID = 0;
757
758char &llvm::PeepholeOptimizerLegacyID = PeepholeOptimizerLegacy::ID;
759
760INITIALIZE_PASS_BEGIN(PeepholeOptimizerLegacy, DEBUG_TYPE,
761 "Peephole Optimizations", false, false)
764INITIALIZE_PASS_END(PeepholeOptimizerLegacy, DEBUG_TYPE,
765 "Peephole Optimizations", false, false)
766
767/// If instruction is a copy-like instruction, i.e. it reads a single register
768/// and writes a single register and it does not modify the source, and if the
769/// source value is preserved as a sub-register of the result, then replace all
770/// reachable uses of the source with the subreg of the result.
771///
772/// Do not generate an EXTRACT that is used only in a debug use, as this changes
773/// the code. Since this code does not currently share EXTRACTs, just ignore all
774/// debug uses.
775bool PeepholeOptimizer::optimizeExtInstr(
777 SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
778 Register SrcReg, DstReg;
779 unsigned SubIdx;
780 if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
781 return false;
782
783 if (DstReg.isPhysical() || SrcReg.isPhysical())
784 return false;
785
786 if (MRI->hasOneNonDBGUse(SrcReg))
787 // No other uses.
788 return false;
789
790 // Ensure DstReg can get a register class that actually supports
791 // sub-registers. Don't change the class until we commit.
792 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
793 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
794 if (!DstRC)
795 return false;
796
797 // The ext instr may be operating on a sub-register of SrcReg as well.
798 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
799 // register.
800 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
801 // SrcReg:SubIdx should be replaced.
802 bool UseSrcSubIdx =
803 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
804
805 // The source has other uses. See if we can replace the other uses with use of
806 // the result of the extension.
808 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
809 ReachedBBs.insert(UI.getParent());
810
811 // Uses that are in the same BB of uses of the result of the instruction.
813
814 // Uses that the result of the instruction can reach.
816
817 bool ExtendLife = true;
818 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
819 MachineInstr *UseMI = UseMO.getParent();
820 if (UseMI == &MI)
821 continue;
822
823 if (UseMI->isPHI()) {
824 ExtendLife = false;
825 continue;
826 }
827
828 // Only accept uses of SrcReg:SubIdx.
829 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
830 continue;
831
832 // It's an error to translate this:
833 //
834 // %reg1025 = <sext> %reg1024
835 // ...
836 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
837 //
838 // into this:
839 //
840 // %reg1025 = <sext> %reg1024
841 // ...
842 // %reg1027 = COPY %reg1025:4
843 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
844 //
845 // The problem here is that SUBREG_TO_REG is there to assert that an
846 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
847 // the COPY here, it will give us the value after the <sext>, not the
848 // original value of %reg1024 before <sext>.
849 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
850 continue;
851
852 MachineBasicBlock *UseMBB = UseMI->getParent();
853 if (UseMBB == &MBB) {
854 // Local uses that come after the extension.
855 if (!LocalMIs.count(UseMI))
856 Uses.push_back(&UseMO);
857 } else if (ReachedBBs.count(UseMBB)) {
858 // Non-local uses where the result of the extension is used. Always
859 // replace these unless it's a PHI.
860 Uses.push_back(&UseMO);
861 } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
862 // We may want to extend the live range of the extension result in order
863 // to replace these uses.
864 ExtendedUses.push_back(&UseMO);
865 } else {
866 // Both will be live out of the def MBB anyway. Don't extend live range of
867 // the extension result.
868 ExtendLife = false;
869 break;
870 }
871 }
872
873 if (ExtendLife && !ExtendedUses.empty())
874 // Extend the liveness of the extension result.
875 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
876
877 // Now replace all uses.
878 bool Changed = false;
879 if (!Uses.empty()) {
880 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
881
882 // Look for PHI uses of the extended result, we don't want to extend the
883 // liveness of a PHI input. It breaks all kinds of assumptions down
884 // stream. A PHI use is expected to be the kill of its source values.
885 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
886 if (UI.isPHI())
887 PHIBBs.insert(UI.getParent());
888
889 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
890 for (MachineOperand *UseMO : Uses) {
891 MachineInstr *UseMI = UseMO->getParent();
892 MachineBasicBlock *UseMBB = UseMI->getParent();
893 if (PHIBBs.count(UseMBB))
894 continue;
895
896 // About to add uses of DstReg, clear DstReg's kill flags.
897 if (!Changed) {
898 MRI->clearKillFlags(DstReg);
899 MRI->constrainRegClass(DstReg, DstRC);
900 }
901
902 // SubReg defs are illegal in machine SSA phase,
903 // we should not generate SubReg defs.
904 //
905 // For example, for the instructions:
906 //
907 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
908 // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
909 //
910 // We should generate:
911 //
912 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
913 // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
914 // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
915 //
916 if (UseSrcSubIdx)
917 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
918
919 Register NewVR = MRI->createVirtualRegister(RC);
920 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
921 TII->get(TargetOpcode::COPY), NewVR)
922 .addReg(DstReg, 0, SubIdx);
923 if (UseSrcSubIdx)
924 UseMO->setSubReg(0);
925
926 UseMO->setReg(NewVR);
927 ++NumReuse;
928 Changed = true;
929 }
930 }
931
932 return Changed;
933}
934
935/// If the instruction is a compare and the previous instruction it's comparing
936/// against already sets (or could be modified to set) the same flag as the
937/// compare, then we can remove the comparison and use the flag from the
938/// previous instruction.
939bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
940 // If this instruction is a comparison against zero and isn't comparing a
941 // physical register, we can try to optimize it.
942 Register SrcReg, SrcReg2;
943 int64_t CmpMask, CmpValue;
944 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
945 SrcReg.isPhysical() || SrcReg2.isPhysical())
946 return false;
947
948 // Attempt to optimize the comparison instruction.
949 LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
950 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
951 LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
952 ++NumCmps;
953 return true;
954 }
955
956 return false;
957}
958
959/// Optimize a select instruction.
960bool PeepholeOptimizer::optimizeSelect(
961 MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
962 unsigned TrueOp = 0;
963 unsigned FalseOp = 0;
964 bool Optimizable = false;
966 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
967 return false;
968 if (!Optimizable)
969 return false;
970 if (!TII->optimizeSelect(MI, LocalMIs))
971 return false;
972 LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
973 MI.eraseFromParent();
974 ++NumSelects;
975 return true;
976}
977
978/// Check if a simpler conditional branch can be generated.
979bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
980 return TII->optimizeCondBranch(MI);
981}
982
983/// Try to find a better source value that shares the same register file to
984/// replace \p RegSubReg in an instruction like
985/// `DefRC.DefSubReg = COPY RegSubReg`
986///
987/// When true is returned, the \p RewriteMap can be used by the client to
988/// retrieve all Def -> Use along the way up to the next source. Any found
989/// Use that is not itself a key for another entry, is the next source to
990/// use. During the search for the next source, multiple sources can be found
991/// given multiple incoming sources of a PHI instruction. In this case, we
992/// look in each PHI source for the next source; all found next sources must
993/// share the same register file as \p Reg and \p SubReg. The client should
994/// then be capable to rewrite all intermediate PHIs to get the next source.
995/// \return False if no alternative sources are available. True otherwise.
996bool PeepholeOptimizer::findNextSource(const TargetRegisterClass *DefRC,
997 unsigned DefSubReg,
998 RegSubRegPair RegSubReg,
999 RewriteMapTy &RewriteMap) {
1000 // Do not try to find a new source for a physical register.
1001 // So far we do not have any motivating example for doing that.
1002 // Thus, instead of maintaining untested code, we will revisit that if
1003 // that changes at some point.
1004 Register Reg = RegSubReg.Reg;
1005 RegSubRegPair CurSrcPair = RegSubReg;
1006 SmallVector<RegSubRegPair, 4> SrcToLook = {CurSrcPair};
1007
1008 unsigned PHICount = 0;
1009 do {
1010 CurSrcPair = SrcToLook.pop_back_val();
1011 // As explained above, do not handle physical registers
1012 if (CurSrcPair.Reg.isPhysical())
1013 return false;
1014
1015 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
1016
1017 // Follow the chain of copies until we find a more suitable source, a phi
1018 // or have to abort.
1019 while (true) {
1020 ValueTrackerResult Res = ValTracker.getNextSource();
1021 // Abort at the end of a chain (without finding a suitable source).
1022 if (!Res.isValid())
1023 return false;
1024
1025 // Insert the Def -> Use entry for the recently found source.
1026 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1027
1028 if (!WasInserted) {
1029 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1030
1031 assert(CurSrcRes == Res && "ValueTrackerResult found must match");
1032 // An existent entry with multiple sources is a PHI cycle we must avoid.
1033 // Otherwise it's an entry with a valid next source we already found.
1034 if (CurSrcRes.getNumSources() > 1) {
1036 << "findNextSource: found PHI cycle, aborting...\n");
1037 return false;
1038 }
1039 break;
1040 }
1041
1042 // ValueTrackerResult usually have one source unless it's the result from
1043 // a PHI instruction. Add the found PHI edges to be looked up further.
1044 unsigned NumSrcs = Res.getNumSources();
1045 if (NumSrcs > 1) {
1046 PHICount++;
1047 if (PHICount >= RewritePHILimit) {
1048 LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
1049 return false;
1050 }
1051
1052 for (unsigned i = 0; i < NumSrcs; ++i)
1053 SrcToLook.push_back(Res.getSrc(i));
1054 break;
1055 }
1056
1057 CurSrcPair = Res.getSrc(0);
1058 // Do not extend the live-ranges of physical registers as they add
1059 // constraints to the register allocator. Moreover, if we want to extend
1060 // the live-range of a physical register, unlike SSA virtual register,
1061 // we will have to check that they aren't redefine before the related use.
1062 if (CurSrcPair.Reg.isPhysical())
1063 return false;
1064
1065 // Keep following the chain if the value isn't any better yet.
1066 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
1067 if (!TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1068 CurSrcPair.SubReg))
1069 continue;
1070
1071 // We currently cannot deal with subreg operands on PHI instructions
1072 // (see insertPHI()).
1073 if (PHICount > 0 && CurSrcPair.SubReg != 0)
1074 continue;
1075
1076 // We found a suitable source, and are done with this chain.
1077 break;
1078 }
1079 } while (!SrcToLook.empty());
1080
1081 // If we did not find a more suitable source, there is nothing to optimize.
1082 return CurSrcPair.Reg != Reg;
1083}
1084
1085/// Insert a PHI instruction with incoming edges \p SrcRegs that are
1086/// guaranteed to have the same register class. This is necessary whenever we
1087/// successfully traverse a PHI instruction and find suitable sources coming
1088/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
1089/// suitable to be used in a new COPY instruction.
1091 const TargetInstrInfo &TII,
1092 const SmallVectorImpl<RegSubRegPair> &SrcRegs,
1093 MachineInstr &OrigPHI) {
1094 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
1095
1096 const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
1097 // NewRC is only correct if no subregisters are involved. findNextSource()
1098 // should have rejected those cases already.
1099 assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
1100 Register NewVR = MRI.createVirtualRegister(NewRC);
1101 MachineBasicBlock *MBB = OrigPHI.getParent();
1102 MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
1103 TII.get(TargetOpcode::PHI), NewVR);
1104
1105 unsigned MBBOpIdx = 2;
1106 for (const RegSubRegPair &RegPair : SrcRegs) {
1107 MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
1108 MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
1109 // Since we're extended the lifetime of RegPair.Reg, clear the
1110 // kill flags to account for that and make RegPair.Reg reaches
1111 // the new PHI.
1112 MRI.clearKillFlags(RegPair.Reg);
1113 MBBOpIdx += 2;
1114 }
1115
1116 return *MIB;
1117}
1118
1119/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
1120/// the new source to use for rewrite. If \p HandleMultipleSources is true and
1121/// multiple sources for a given \p Def are found along the way, we found a
1122/// PHI instructions that needs to be rewritten.
1123/// TODO: HandleMultipleSources should be removed once we test PHI handling
1124/// with coalescable copies.
1125static RegSubRegPair
1127 RegSubRegPair Def,
1128 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1129 bool HandleMultipleSources = true) {
1130 RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1131 while (true) {
1132 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1133 // If there are no entries on the map, LookupSrc is the new source.
1134 if (!Res.isValid())
1135 return LookupSrc;
1136
1137 // There's only one source for this definition, keep searching...
1138 unsigned NumSrcs = Res.getNumSources();
1139 if (NumSrcs == 1) {
1140 LookupSrc.Reg = Res.getSrcReg(0);
1141 LookupSrc.SubReg = Res.getSrcSubReg(0);
1142 continue;
1143 }
1144
1145 // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1146 if (!HandleMultipleSources)
1147 break;
1148
1149 // Multiple sources, recurse into each source to find a new source
1150 // for it. Then, rewrite the PHI accordingly to its new edges.
1152 for (unsigned i = 0; i < NumSrcs; ++i) {
1153 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1154 NewPHISrcs.push_back(
1155 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1156 }
1157
1158 // Build the new PHI node and return its def register as the new source.
1159 MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1160 MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1161 LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1162 LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
1163 LLVM_DEBUG(dbgs() << " With: " << NewPHI);
1164 const MachineOperand &MODef = NewPHI.getOperand(0);
1165 return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1166 }
1167
1168 return RegSubRegPair(0, 0);
1169}
1170
1171bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
1172 bool Changed = false;
1173 // Get the right rewriter for the current copy.
1174 // Rewrite each rewritable source.
1175 RegSubRegPair Dst;
1176 RegSubRegPair TrackPair;
1177 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1178 if (Dst.Reg.isPhysical()) {
1179 // Do not try to find a new source for a physical register.
1180 // So far we do not have any motivating example for doing that.
1181 // Thus, instead of maintaining untested code, we will revisit that if
1182 // that changes at some point.
1183 continue;
1184 }
1185
1186 const TargetRegisterClass *DefRC = MRI->getRegClass(Dst.Reg);
1187
1188 // Keep track of PHI nodes and its incoming edges when looking for sources.
1189 RewriteMapTy RewriteMap;
1190 // Try to find a more suitable source. If we failed to do so, or get the
1191 // actual source, move to the next source.
1192 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1193 continue;
1194
1195 // Get the new source to rewrite. TODO: Only enable handling of multiple
1196 // sources (PHIs) once we have a motivating example and testcases for it.
1197 RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1198 /*HandleMultipleSources=*/false);
1199 assert(TrackPair.Reg != NewSrc.Reg &&
1200 "should not rewrite source to original value");
1201 if (!NewSrc.Reg)
1202 continue;
1203
1204 if (NewSrc.SubReg) {
1205 // Verify the register class supports the subregister index. ARM's
1206 // copy-like queries return register:subreg pairs where the register's
1207 // current class does not directly support the subregister index.
1208 const TargetRegisterClass *RC = MRI->getRegClass(NewSrc.Reg);
1209 const TargetRegisterClass *WithSubRC =
1210 TRI->getSubClassWithSubReg(RC, NewSrc.SubReg);
1211 if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1212 continue;
1213 Changed = true;
1214 }
1215
1216 // Rewrite source.
1217 if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1218 // We may have extended the live-range of NewSrc, account for that.
1219 MRI->clearKillFlags(NewSrc.Reg);
1220 Changed = true;
1221 }
1222 }
1223
1224 // TODO: We could have a clean-up method to tidy the instruction.
1225 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1226 // => v0 = COPY v1
1227 // Currently we haven't seen motivating example for that and we
1228 // want to avoid untested code.
1229 NumRewrittenCopies += Changed;
1230 return Changed;
1231}
1232
1233/// Optimize generic copy instructions to avoid cross register bank copy.
1234/// The optimization looks through a chain of copies and tries to find a source
1235/// that has a compatible register class.
1236/// Two register classes are considered to be compatible if they share the same
1237/// register bank.
1238/// New copies issued by this optimization are register allocator
1239/// friendly. This optimization does not remove any copy as it may
1240/// overconstrain the register allocator, but replaces some operands
1241/// when possible.
1242/// \pre isCoalescableCopy(*MI) is true.
1243/// \return True, when \p MI has been rewritten. False otherwise.
1244bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1245 assert(isCoalescableCopy(MI) && "Invalid argument");
1246 assert(MI.getDesc().getNumDefs() == 1 &&
1247 "Coalescer can understand multiple defs?!");
1248 const MachineOperand &MODef = MI.getOperand(0);
1249 // Do not rewrite physical definitions.
1250 if (MODef.getReg().isPhysical())
1251 return false;
1252
1253 switch (MI.getOpcode()) {
1254 case TargetOpcode::COPY:
1255 return optimizeCoalescableCopyImpl(CopyRewriter(MI));
1256 case TargetOpcode::INSERT_SUBREG:
1257 return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI));
1258 case TargetOpcode::EXTRACT_SUBREG:
1259 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII));
1260 case TargetOpcode::REG_SEQUENCE:
1261 return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI));
1262 default:
1263 // Handle uncoalescable copy-like instructions.
1264 if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1265 MI.isExtractSubregLike())
1266 return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI));
1267 return false;
1268 }
1269}
1270
1271/// Rewrite the source found through \p Def, by using the \p RewriteMap
1272/// and create a new COPY instruction. More info about RewriteMap in
1273/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1274/// Uncoalescable copies, since they are copy like instructions that aren't
1275/// recognized by the register allocator.
1276MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1277 RegSubRegPair Def,
1278 RewriteMapTy &RewriteMap) {
1279 assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
1280
1281 // Find the new source to use in the COPY rewrite.
1282 RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1283
1284 // Insert the COPY.
1285 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1286 Register NewVReg = MRI->createVirtualRegister(DefRC);
1287
1288 if (NewSrc.SubReg) {
1289 const TargetRegisterClass *NewSrcRC = MRI->getRegClass(NewSrc.Reg);
1290 const TargetRegisterClass *WithSubRC =
1291 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.SubReg);
1292
1293 // The new source may not directly support the subregister, but we should be
1294 // able to assume it is constrainable to support the subregister (otherwise
1295 // ValueTracker was lying and reported a useless value).
1296 if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1297 llvm_unreachable("replacement register cannot support subregister");
1298 }
1299
1300 MachineInstr *NewCopy =
1301 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1302 TII->get(TargetOpcode::COPY), NewVReg)
1303 .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
1304
1305 if (Def.SubReg) {
1306 NewCopy->getOperand(0).setSubReg(Def.SubReg);
1307 NewCopy->getOperand(0).setIsUndef();
1308 }
1309
1310 LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1311 LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
1312 LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
1313 MRI->replaceRegWith(Def.Reg, NewVReg);
1314 MRI->clearKillFlags(NewVReg);
1315
1316 // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1317 // account for that.
1318 MRI->clearKillFlags(NewSrc.Reg);
1319
1320 return *NewCopy;
1321}
1322
1323/// Optimize copy-like instructions to create
1324/// register coalescer friendly instruction.
1325/// The optimization tries to kill-off the \p MI by looking
1326/// through a chain of copies to find a source that has a compatible
1327/// register class.
1328/// If such a source is found, it replace \p MI by a generic COPY
1329/// operation.
1330/// \pre isUncoalescableCopy(*MI) is true.
1331/// \return True, when \p MI has been optimized. In that case, \p MI has
1332/// been removed from its parent.
1333/// All COPY instructions created, are inserted in \p LocalMIs.
1334bool PeepholeOptimizer::optimizeUncoalescableCopy(
1335 MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1336 assert(isUncoalescableCopy(MI) && "Invalid argument");
1337 UncoalescableRewriter CpyRewriter(MI);
1338
1339 // Rewrite each rewritable source by generating new COPYs. This works
1340 // differently from optimizeCoalescableCopy since it first makes sure that all
1341 // definitions can be rewritten.
1342 RewriteMapTy RewriteMap;
1343 RegSubRegPair Src;
1345 SmallVector<RegSubRegPair, 4> RewritePairs;
1346 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1347 // If a physical register is here, this is probably for a good reason.
1348 // Do not rewrite that.
1349 if (Def.Reg.isPhysical())
1350 return false;
1351
1352 // FIXME: Uncoalescable copies are treated differently by
1353 // UncoalescableRewriter, and this probably should not share
1354 // API. getNextRewritableSource really finds rewritable defs.
1355 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1356
1357 // If we do not know how to rewrite this definition, there is no point
1358 // in trying to kill this instruction.
1359 if (!findNextSource(DefRC, Def.SubReg, Def, RewriteMap))
1360 return false;
1361
1362 RewritePairs.push_back(Def);
1363 }
1364
1365 // The change is possible for all defs, do it.
1366 for (const RegSubRegPair &Def : RewritePairs) {
1367 // Rewrite the "copy" in a way the register coalescer understands.
1368 MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1369 LocalMIs.insert(&NewCopy);
1370 }
1371
1372 // MI is now dead.
1373 LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1374 MI.eraseFromParent();
1375 ++NumUncoalescableCopies;
1376 return true;
1377}
1378
1379/// Check whether MI is a candidate for folding into a later instruction.
1380/// We only fold loads to virtual registers and the virtual register defined
1381/// has a single user.
1382bool PeepholeOptimizer::isLoadFoldable(
1383 MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1384 if (!MI.canFoldAsLoad() || !MI.mayLoad())
1385 return false;
1386 const MCInstrDesc &MCID = MI.getDesc();
1387 if (MCID.getNumDefs() != 1)
1388 return false;
1389
1390 Register Reg = MI.getOperand(0).getReg();
1391 // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1392 // loads. It should be checked when processing uses of the load, since
1393 // uses can be removed during peephole.
1394 if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1395 MRI->hasOneNonDBGUser(Reg)) {
1396 FoldAsLoadDefCandidates.insert(Reg);
1397 return true;
1398 }
1399 return false;
1400}
1401
1402bool PeepholeOptimizer::isMoveImmediate(
1403 MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1404 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1405 const MCInstrDesc &MCID = MI.getDesc();
1406 if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
1407 return false;
1408 Register Reg = MI.getOperand(0).getReg();
1409 if (!Reg.isVirtual())
1410 return false;
1411
1412 int64_t ImmVal;
1413 if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
1414 return false;
1415
1416 ImmDefMIs.insert(std::make_pair(Reg, &MI));
1417 ImmDefRegs.insert(Reg);
1418 return true;
1419}
1420
1421/// Try folding register operands that are defined by move immediate
1422/// instructions, i.e. a trivial constant folding optimization, if
1423/// and only if the def and use are in the same BB.
1424bool PeepholeOptimizer::foldImmediate(
1425 MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1426 DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
1427 Deleted = false;
1428 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1429 MachineOperand &MO = MI.getOperand(i);
1430 if (!MO.isReg() || MO.isDef())
1431 continue;
1432 Register Reg = MO.getReg();
1433 if (!Reg.isVirtual())
1434 continue;
1435 if (ImmDefRegs.count(Reg) == 0)
1436 continue;
1437 DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
1438 assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1439 if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
1440 ++NumImmFold;
1441 // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
1442 // is not deleted, and we happened to get a same MI, we can delete MI and
1443 // replace its users.
1444 if (MRI->getVRegDef(Reg) &&
1445 MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
1446 Register DstReg = MI.getOperand(0).getReg();
1447 if (DstReg.isVirtual() &&
1448 MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
1449 MRI->replaceRegWith(DstReg, Reg);
1450 MI.eraseFromParent();
1451 Deleted = true;
1452 }
1453 }
1454 return true;
1455 }
1456 }
1457 return false;
1458}
1459
1460// FIXME: This is very simple and misses some cases which should be handled when
1461// motivating examples are found.
1462//
1463// The copy rewriting logic should look at uses as well as defs and be able to
1464// eliminate copies across blocks.
1465//
1466// Later copies that are subregister extracts will also not be eliminated since
1467// only the first copy is considered.
1468//
1469// e.g.
1470// %1 = COPY %0
1471// %2 = COPY %0:sub1
1472//
1473// Should replace %2 uses with %1:sub1
1474bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
1475 assert(MI.isCopy() && "expected a COPY machine instruction");
1476
1477 RegSubRegPair SrcPair;
1478 if (!getCopySrc(MI, SrcPair))
1479 return false;
1480
1481 Register DstReg = MI.getOperand(0).getReg();
1482 if (!DstReg.isVirtual())
1483 return false;
1484
1485 if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1486 // First copy of this reg seen.
1487 return false;
1488 }
1489
1490 MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
1491
1492 assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1493 "Unexpected mismatching subreg!");
1494
1495 Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1496
1497 // Only replace if the copy register class is the same.
1498 //
1499 // TODO: If we have multiple copies to different register classes, we may want
1500 // to track multiple copies of the same source register.
1501 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1502 return false;
1503
1504 MRI->replaceRegWith(DstReg, PrevDstReg);
1505
1506 // Lifetime of the previous copy has been extended.
1507 MRI->clearKillFlags(PrevDstReg);
1508 return true;
1509}
1510
1511bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1512 return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1513}
1514
1515bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1516 MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1517 assert(MI.isCopy() && "expected a COPY machine instruction");
1518
1520 return false;
1521
1522 Register DstReg = MI.getOperand(0).getReg();
1523 Register SrcReg = MI.getOperand(1).getReg();
1524 if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1525 // %vreg = COPY $physreg
1526 // Avoid using a datastructure which can track multiple live non-allocatable
1527 // phys->virt copies since LLVM doesn't seem to do this.
1528 NAPhysToVirtMIs.insert({SrcReg, &MI});
1529 return false;
1530 }
1531
1532 if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1533 return false;
1534
1535 // $physreg = COPY %vreg
1536 auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1537 if (PrevCopy == NAPhysToVirtMIs.end()) {
1538 // We can't remove the copy: there was an intervening clobber of the
1539 // non-allocatable physical register after the copy to virtual.
1540 LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1541 << MI);
1542 return false;
1543 }
1544
1545 Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1546 if (PrevDstReg == SrcReg) {
1547 // Remove the virt->phys copy: we saw the virtual register definition, and
1548 // the non-allocatable physical register's state hasn't changed since then.
1549 LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1550 ++NumNAPhysCopies;
1551 return true;
1552 }
1553
1554 // Potential missed optimization opportunity: we saw a different virtual
1555 // register get a copy of the non-allocatable physical register, and we only
1556 // track one such copy. Avoid getting confused by this new non-allocatable
1557 // physical register definition, and remove it from the tracked copies.
1558 LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1559 NAPhysToVirtMIs.erase(PrevCopy);
1560 return false;
1561}
1562
1563/// \bried Returns true if \p MO is a virtual register operand.
1565 return MO.isReg() && MO.getReg().isVirtual();
1566}
1567
1568bool PeepholeOptimizer::findTargetRecurrence(
1569 Register Reg, const SmallSet<Register, 2> &TargetRegs,
1570 RecurrenceCycle &RC) {
1571 // Recurrence found if Reg is in TargetRegs.
1572 if (TargetRegs.count(Reg))
1573 return true;
1574
1575 // TODO: Curerntly, we only allow the last instruction of the recurrence
1576 // cycle (the instruction that feeds the PHI instruction) to have more than
1577 // one uses to guarantee that commuting operands does not tie registers
1578 // with overlapping live range. Once we have actual live range info of
1579 // each register, this constraint can be relaxed.
1580 if (!MRI->hasOneNonDBGUse(Reg))
1581 return false;
1582
1583 // Give up if the reccurrence chain length is longer than the limit.
1584 if (RC.size() >= MaxRecurrenceChain)
1585 return false;
1586
1587 MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1588 unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
1589
1590 // Only interested in recurrences whose instructions have only one def, which
1591 // is a virtual register.
1592 if (MI.getDesc().getNumDefs() != 1)
1593 return false;
1594
1595 MachineOperand &DefOp = MI.getOperand(0);
1596 if (!isVirtualRegisterOperand(DefOp))
1597 return false;
1598
1599 // Check if def operand of MI is tied to any use operand. We are only
1600 // interested in the case that all the instructions in the recurrence chain
1601 // have there def operand tied with one of the use operand.
1602 unsigned TiedUseIdx;
1603 if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1604 return false;
1605
1606 if (Idx == TiedUseIdx) {
1607 RC.push_back(RecurrenceInstr(&MI));
1608 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1609 } else {
1610 // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1611 unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1612 if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1613 RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1614 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1615 }
1616 }
1617
1618 return false;
1619}
1620
1621/// Phi instructions will eventually be lowered to copy instructions.
1622/// If phi is in a loop header, a recurrence may formulated around the source
1623/// and destination of the phi. For such case commuting operands of the
1624/// instructions in the recurrence may enable coalescing of the copy instruction
1625/// generated from the phi. For example, if there is a recurrence of
1626///
1627/// LoopHeader:
1628/// %1 = phi(%0, %100)
1629/// LoopLatch:
1630/// %0<def, tied1> = ADD %2<def, tied0>, %1
1631///
1632/// , the fact that %0 and %2 are in the same tied operands set makes
1633/// the coalescing of copy instruction generated from the phi in
1634/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1635/// %2 have overlapping live range. This introduces additional move
1636/// instruction to the final assembly. However, if we commute %2 and
1637/// %1 of ADD instruction, the redundant move instruction can be
1638/// avoided.
1639bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1640 SmallSet<Register, 2> TargetRegs;
1641 for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1642 MachineOperand &MO = PHI.getOperand(Idx);
1643 assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1644 TargetRegs.insert(MO.getReg());
1645 }
1646
1647 bool Changed = false;
1648 RecurrenceCycle RC;
1649 if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1650 // Commutes operands of instructions in RC if necessary so that the copy to
1651 // be generated from PHI can be coalesced.
1652 LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1653 for (auto &RI : RC) {
1654 LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1655 auto CP = RI.getCommutePair();
1656 if (CP) {
1657 Changed = true;
1658 TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1659 (*CP).second);
1660 LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1661 }
1662 }
1663 }
1664
1665 return Changed;
1666}
1667
1668PreservedAnalyses
1671 MFPropsModifier _(*this, MF);
1672 auto *DT =
1673 Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
1674 auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
1675 PeepholeOptimizer Impl(DT, MLI);
1676 bool Changed = Impl.run(MF);
1677 if (!Changed)
1678 return PreservedAnalyses::all();
1679
1681 PA.preserve<MachineDominatorTreeAnalysis>();
1682 PA.preserve<MachineLoopAnalysis>();
1683 PA.preserveSet<CFGAnalyses>();
1684 return PA;
1685}
1686
1687bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
1688 if (skipFunction(MF.getFunction()))
1689 return false;
1690 auto *DT = Aggressive
1691 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1692 : nullptr;
1693 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1694 PeepholeOptimizer Impl(DT, MLI);
1695 return Impl.run(MF);
1696}
1697
1698bool PeepholeOptimizer::run(MachineFunction &MF) {
1699
1700 LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1701 LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1702
1703 if (DisablePeephole)
1704 return false;
1705
1706 TII = MF.getSubtarget().getInstrInfo();
1708 MRI = &MF.getRegInfo();
1709 MF.setDelegate(this);
1710
1711 bool Changed = false;
1712
1713 for (MachineBasicBlock &MBB : MF) {
1714 bool SeenMoveImm = false;
1715
1716 // During this forward scan, at some point it needs to answer the question
1717 // "given a pointer to an MI in the current BB, is it located before or
1718 // after the current instruction".
1719 // To perform this, the following set keeps track of the MIs already seen
1720 // during the scan, if a MI is not in the set, it is assumed to be located
1721 // after. Newly created MIs have to be inserted in the set as well.
1723 SmallSet<Register, 4> ImmDefRegs;
1725 SmallSet<Register, 16> FoldAsLoadDefCandidates;
1726
1727 // Track when a non-allocatable physical register is copied to a virtual
1728 // register so that useless moves can be removed.
1729 //
1730 // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1731 // without any intervening re-definition of $physreg.
1732 DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1733
1734 CopySrcMIs.clear();
1735
1736 bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1737
1738 for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1739 MII != MIE;) {
1740 MachineInstr *MI = &*MII;
1741 // We may be erasing MI below, increment MII now.
1742 ++MII;
1743 LocalMIs.insert(MI);
1744
1745 // Skip debug instructions. They should not affect this peephole
1746 // optimization.
1747 if (MI->isDebugInstr())
1748 continue;
1749
1750 if (MI->isPosition())
1751 continue;
1752
1753 if (IsLoopHeader && MI->isPHI()) {
1754 if (optimizeRecurrence(*MI)) {
1755 Changed = true;
1756 continue;
1757 }
1758 }
1759
1760 if (!MI->isCopy()) {
1761 for (const MachineOperand &MO : MI->operands()) {
1762 // Visit all operands: definitions can be implicit or explicit.
1763 if (MO.isReg()) {
1764 Register Reg = MO.getReg();
1765 if (MO.isDef() && isNAPhysCopy(Reg)) {
1766 const auto &Def = NAPhysToVirtMIs.find(Reg);
1767 if (Def != NAPhysToVirtMIs.end()) {
1768 // A new definition of the non-allocatable physical register
1769 // invalidates previous copies.
1771 << "NAPhysCopy: invalidating because of " << *MI);
1772 NAPhysToVirtMIs.erase(Def);
1773 }
1774 }
1775 } else if (MO.isRegMask()) {
1776 const uint32_t *RegMask = MO.getRegMask();
1777 for (auto &RegMI : NAPhysToVirtMIs) {
1778 Register Def = RegMI.first;
1779 if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
1781 << "NAPhysCopy: invalidating because of " << *MI);
1782 NAPhysToVirtMIs.erase(Def);
1783 }
1784 }
1785 }
1786 }
1787 }
1788
1789 if (MI->isImplicitDef() || MI->isKill())
1790 continue;
1791
1792 if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1793 // Blow away all non-allocatable physical registers knowledge since we
1794 // don't know what's correct anymore.
1795 //
1796 // FIXME: handle explicit asm clobbers.
1797 LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1798 << *MI);
1799 NAPhysToVirtMIs.clear();
1800 }
1801
1802 if ((isUncoalescableCopy(*MI) &&
1803 optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1804 (MI->isCompare() && optimizeCmpInstr(*MI)) ||
1805 (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1806 // MI is deleted.
1807 LocalMIs.erase(MI);
1808 Changed = true;
1809 continue;
1810 }
1811
1812 if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1813 Changed = true;
1814 continue;
1815 }
1816
1817 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1818 // MI is just rewritten.
1819 Changed = true;
1820 continue;
1821 }
1822
1823 if (MI->isCopy() && (foldRedundantCopy(*MI) ||
1824 foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1825 LocalMIs.erase(MI);
1826 LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1827 MI->eraseFromParent();
1828 Changed = true;
1829 continue;
1830 }
1831
1832 if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1833 SeenMoveImm = true;
1834 } else {
1835 Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1836 // optimizeExtInstr might have created new instructions after MI
1837 // and before the already incremented MII. Adjust MII so that the
1838 // next iteration sees the new instructions.
1839 MII = MI;
1840 ++MII;
1841 if (SeenMoveImm) {
1842 bool Deleted;
1843 Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
1844 if (Deleted) {
1845 LocalMIs.erase(MI);
1846 continue;
1847 }
1848 }
1849 }
1850
1851 // Check whether MI is a load candidate for folding into a later
1852 // instruction. If MI is not a candidate, check whether we can fold an
1853 // earlier load into MI.
1854 if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1855 !FoldAsLoadDefCandidates.empty()) {
1856
1857 // We visit each operand even after successfully folding a previous
1858 // one. This allows us to fold multiple loads into a single
1859 // instruction. We do assume that optimizeLoadInstr doesn't insert
1860 // foldable uses earlier in the argument list. Since we don't restart
1861 // iteration, we'd miss such cases.
1862 const MCInstrDesc &MIDesc = MI->getDesc();
1863 for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) {
1864 const MachineOperand &MOp = MI->getOperand(i);
1865 if (!MOp.isReg())
1866 continue;
1867 Register FoldAsLoadDefReg = MOp.getReg();
1868 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1869 // We need to fold load after optimizeCmpInstr, since
1870 // optimizeCmpInstr can enable folding by converting SUB to CMP.
1871 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
1872 // we need it for markUsesInDebugValueAsUndef().
1873 Register FoldedReg = FoldAsLoadDefReg;
1874 MachineInstr *DefMI = nullptr;
1875 if (MachineInstr *FoldMI =
1876 TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
1877 // Update LocalMIs since we replaced MI with FoldMI and deleted
1878 // DefMI.
1879 LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
1880 LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
1881 LocalMIs.erase(MI);
1882 LocalMIs.erase(DefMI);
1883 LocalMIs.insert(FoldMI);
1884 // Update the call info.
1885 if (MI->shouldUpdateAdditionalCallInfo())
1886 MI->getMF()->moveAdditionalCallInfo(MI, FoldMI);
1887 MI->eraseFromParent();
1889 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1890 FoldAsLoadDefCandidates.erase(FoldedReg);
1891 ++NumLoadFold;
1892
1893 // MI is replaced with FoldMI so we can continue trying to fold
1894 Changed = true;
1895 MI = FoldMI;
1896 }
1897 }
1898 }
1899 }
1900
1901 // If we run into an instruction we can't fold across, discard
1902 // the load candidates. Note: We might be able to fold *into* this
1903 // instruction, so this needs to be after the folding logic.
1904 if (MI->isLoadFoldBarrier()) {
1905 LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1906 FoldAsLoadDefCandidates.clear();
1907 }
1908 }
1909 }
1910
1911 MF.resetDelegate(this);
1912 return Changed;
1913}
1914
1915ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1916 assert(Def->isCopy() && "Invalid definition");
1917 // Copy instruction are supposed to be: Def = Src.
1918 // If someone breaks this assumption, bad things will happen everywhere.
1919 // There may be implicit uses preventing the copy to be moved across
1920 // some target specific register definitions
1921 assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1922 "Invalid number of operands");
1923 assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1924 assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1925
1926 // Otherwise, we want the whole source.
1927 const MachineOperand &Src = Def->getOperand(1);
1928 if (Src.isUndef())
1929 return ValueTrackerResult();
1930
1931 Register SrcReg = Src.getReg();
1932 unsigned SubReg = Src.getSubReg();
1933 if (DefSubReg) {
1934 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1935 SubReg = TRI->composeSubRegIndices(SubReg, DefSubReg);
1936
1937 if (SrcReg.isVirtual()) {
1938 // TODO: Try constraining on rewrite if we can
1939 const TargetRegisterClass *RegRC = MRI.getRegClass(SrcReg);
1940 const TargetRegisterClass *SrcWithSubRC =
1941 TRI->getSubClassWithSubReg(RegRC, SubReg);
1942 if (RegRC != SrcWithSubRC)
1943 return ValueTrackerResult();
1944 } else {
1945 if (!TRI->getSubReg(SrcReg, SubReg))
1946 return ValueTrackerResult();
1947 }
1948 }
1949
1950 return ValueTrackerResult(SrcReg, SubReg);
1951}
1952
1953ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1954 assert(Def->isBitcast() && "Invalid definition");
1955
1956 // Bail if there are effects that a plain copy will not expose.
1957 if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1958 return ValueTrackerResult();
1959
1960 // Bitcasts with more than one def are not supported.
1961 if (Def->getDesc().getNumDefs() != 1)
1962 return ValueTrackerResult();
1963
1964 assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1965
1966 unsigned SrcIdx = Def->getNumOperands();
1967 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1968 ++OpIdx) {
1969 const MachineOperand &MO = Def->getOperand(OpIdx);
1970 if (!MO.isReg() || !MO.getReg())
1971 continue;
1972 // Ignore dead implicit defs.
1973 if (MO.isImplicit() && MO.isDead())
1974 continue;
1975 assert(!MO.isDef() && "We should have skipped all the definitions by now");
1976 if (SrcIdx != EndOpIdx)
1977 // Multiple sources?
1978 return ValueTrackerResult();
1979 SrcIdx = OpIdx;
1980 }
1981
1982 // In some rare case, Def has no input, SrcIdx is out of bound,
1983 // getOperand(SrcIdx) will fail below.
1984 if (SrcIdx >= Def->getNumOperands())
1985 return ValueTrackerResult();
1986
1987 const MachineOperand &DefOp = Def->getOperand(DefIdx);
1988
1989 // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
1990 // will break the assumed guarantees for the upper bits.
1991 for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
1992 if (UseMI.isSubregToReg())
1993 return ValueTrackerResult();
1994 }
1995
1996 const MachineOperand &Src = Def->getOperand(SrcIdx);
1997 if (Src.isUndef())
1998 return ValueTrackerResult();
1999 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
2000}
2001
2002ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
2003 assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
2004 "Invalid definition");
2005
2006 assert(!Def->getOperand(DefIdx).getSubReg() && "illegal subregister def");
2007
2009 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2010 return ValueTrackerResult();
2011
2012 // We are looking at:
2013 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
2014 //
2015 // Check if one of the operands exactly defines the subreg we are interested
2016 // in.
2017 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
2018 if (RegSeqInput.SubIdx == DefSubReg)
2019 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2020 }
2021
2022 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2023
2024 // If we did not find an exact match, see if we can do a composition to
2025 // extract a sub-subregister.
2026 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
2027 LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
2028 LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2029
2030 // Check that this extract reads a subset of this single reg_sequence input.
2031 //
2032 // FIXME: We should be able to filter this in terms of the indexes directly
2033 // without checking the lanemasks.
2034 if ((DefMask & ThisOpRegMask) != DefMask)
2035 continue;
2036
2037 unsigned ReverseDefCompose =
2038 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2039 if (!ReverseDefCompose)
2040 continue;
2041
2042 unsigned ComposedDefInSrcReg1 =
2043 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2044
2045 // TODO: We should be able to defer checking if the result register class
2046 // supports the index to continue looking for a rewritable source.
2047 //
2048 // TODO: Should we modify the register class to support the index?
2049 const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
2050 const TargetRegisterClass *SrcWithSubRC =
2051 TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2052 if (SrcRC != SrcWithSubRC)
2053 return ValueTrackerResult();
2054
2055 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2056 }
2057
2058 // If the subreg we are tracking is super-defined by another subreg,
2059 // we could follow this value. However, this would require to compose
2060 // the subreg and we do not do that for now.
2061 return ValueTrackerResult();
2062}
2063
2064ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2065 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
2066 "Invalid definition");
2067 assert(!Def->getOperand(DefIdx).getSubReg() && "no subreg defs in SSA");
2068
2070 RegSubRegPairAndIdx InsertedReg;
2071 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2072 return ValueTrackerResult();
2073
2074 // We are looking at:
2075 // Def = INSERT_SUBREG v0, v1, sub1
2076 // There are two cases:
2077 // 1. DefSubReg == sub1, get v1.
2078 // 2. DefSubReg != sub1, the value may be available through v0.
2079
2080 // #1 Check if the inserted register matches the required sub index.
2081 if (InsertedReg.SubIdx == DefSubReg) {
2082 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
2083 }
2084 // #2 Otherwise, if the sub register we are looking for is not partial
2085 // defined by the inserted element, we can look through the main
2086 // register (v0).
2087 const MachineOperand &MODef = Def->getOperand(DefIdx);
2088 // If the result register (Def) and the base register (v0) do not
2089 // have the same register class or if we have to compose
2090 // subregisters, bail out.
2091 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
2092 BaseReg.SubReg)
2093 return ValueTrackerResult();
2094
2095 // Get the TRI and check if the inserted sub-register overlaps with the
2096 // sub-register we are tracking.
2097 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2098 if ((TRI->getSubRegIndexLaneMask(DefSubReg) &
2099 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx))
2100 .any())
2101 return ValueTrackerResult();
2102 // At this point, the value is available in v0 via the same subreg
2103 // we used for Def.
2104 return ValueTrackerResult(BaseReg.Reg, DefSubReg);
2105}
2106
2107ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2108 assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) &&
2109 "Invalid definition");
2110 // We are looking at:
2111 // Def = EXTRACT_SUBREG v0, sub0
2112
2113 // Bail if we have to compose sub registers.
2114 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
2115 if (DefSubReg)
2116 return ValueTrackerResult();
2117
2118 RegSubRegPairAndIdx ExtractSubregInputReg;
2119 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2120 return ValueTrackerResult();
2121
2122 // Bail if we have to compose sub registers.
2123 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2124 if (ExtractSubregInputReg.SubReg)
2125 return ValueTrackerResult();
2126 // Otherwise, the value is available in the v0.sub0.
2127 return ValueTrackerResult(ExtractSubregInputReg.Reg,
2128 ExtractSubregInputReg.SubIdx);
2129}
2130
2131ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2132 assert(Def->isSubregToReg() && "Invalid definition");
2133 // We are looking at:
2134 // Def = SUBREG_TO_REG Imm, v0, sub0
2135
2136 // Bail if we have to compose sub registers.
2137 // If DefSubReg != sub0, we would have to check that all the bits
2138 // we track are included in sub0 and if yes, we would have to
2139 // determine the right subreg in v0.
2140 if (DefSubReg != Def->getOperand(3).getImm())
2141 return ValueTrackerResult();
2142 // Bail if we have to compose sub registers.
2143 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2144 if (Def->getOperand(2).getSubReg())
2145 return ValueTrackerResult();
2146
2147 return ValueTrackerResult(Def->getOperand(2).getReg(),
2148 Def->getOperand(3).getImm());
2149}
2150
2151/// Explore each PHI incoming operand and return its sources.
2152ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2153 assert(Def->isPHI() && "Invalid definition");
2154 ValueTrackerResult Res;
2155
2156 // Return all register sources for PHI instructions.
2157 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2158 const MachineOperand &MO = Def->getOperand(i);
2159 assert(MO.isReg() && "Invalid PHI instruction");
2160 // We have no code to deal with undef operands. They shouldn't happen in
2161 // normal programs anyway.
2162 if (MO.isUndef())
2163 return ValueTrackerResult();
2164 Res.addSource(MO.getReg(), MO.getSubReg());
2165 }
2166
2167 return Res;
2168}
2169
2170ValueTrackerResult ValueTracker::getNextSourceImpl() {
2171 assert(Def && "This method needs a valid definition");
2172
2173 assert(((Def->getOperand(DefIdx).isDef() &&
2174 (DefIdx < Def->getDesc().getNumDefs() ||
2175 Def->getDesc().isVariadic())) ||
2176 Def->getOperand(DefIdx).isImplicit()) &&
2177 "Invalid DefIdx");
2178 if (Def->isCopy())
2179 return getNextSourceFromCopy();
2180 if (Def->isBitcast())
2181 return getNextSourceFromBitcast();
2182 // All the remaining cases involve "complex" instructions.
2183 // Bail if we did not ask for the advanced tracking.
2185 return ValueTrackerResult();
2186 if (Def->isRegSequence() || Def->isRegSequenceLike())
2187 return getNextSourceFromRegSequence();
2188 if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2189 return getNextSourceFromInsertSubreg();
2190 if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2191 return getNextSourceFromExtractSubreg();
2192 if (Def->isSubregToReg())
2193 return getNextSourceFromSubregToReg();
2194 if (Def->isPHI())
2195 return getNextSourceFromPHI();
2196 return ValueTrackerResult();
2197}
2198
2199ValueTrackerResult ValueTracker::getNextSource() {
2200 // If we reach a point where we cannot move up in the use-def chain,
2201 // there is nothing we can get.
2202 if (!Def)
2203 return ValueTrackerResult();
2204
2205 ValueTrackerResult Res = getNextSourceImpl();
2206 if (Res.isValid()) {
2207 // Update definition, definition index, and subregister for the
2208 // next call of getNextSource.
2209 // Update the current register.
2210 bool OneRegSrc = Res.getNumSources() == 1;
2211 if (OneRegSrc)
2212 Reg = Res.getSrcReg(0);
2213 // Update the result before moving up in the use-def chain
2214 // with the instruction containing the last found sources.
2215 Res.setInst(Def);
2216
2217 // If we can still move up in the use-def chain, move to the next
2218 // definition.
2219 if (!Reg.isPhysical() && OneRegSrc) {
2221 if (DI != MRI.def_end()) {
2222 Def = DI->getParent();
2223 DefIdx = DI.getOperandNo();
2224 DefSubReg = Res.getSrcSubReg(0);
2225 } else {
2226 Def = nullptr;
2227 }
2228 return Res;
2229 }
2230 }
2231 // If we end up here, this means we will not be able to find another source
2232 // for the next iteration. Make sure any new call to getNextSource bails out
2233 // early by cutting the use-def chain.
2234 Def = nullptr;
2235 return Res;
2236}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition MD5.cpp:57
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Other
Any other memory.
Definition ModRef.h:68
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.