LLVM  6.0.0svn
HexagonBitSimplify.cpp
Go to the documentation of this file.
1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "BitTracker.h"
11 #include "HexagonBitTracker.h"
12 #include "HexagonInstrInfo.h"
13 #include "HexagonRegisterInfo.h"
14 #include "HexagonSubtarget.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/Pass.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <limits>
44 #include <utility>
45 #include <vector>
46 
47 #define DEBUG_TYPE "hexbit"
48 
49 using namespace llvm;
50 
51 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
52  cl::init(true), cl::desc("Preserve subregisters in tied operands"));
53 static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
54  cl::init(true), cl::desc("Generate extract instructions"));
55 static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
56  cl::init(true), cl::desc("Generate bitsplit instructions"));
57 
58 static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
60 static unsigned CountExtract = 0;
61 static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
63 static unsigned CountBitSplit = 0;
64 
65 namespace llvm {
66 
69 
70 } // end namespace llvm
71 
72 namespace {
73 
74  // Set of virtual registers, based on BitVector.
75  struct RegisterSet : private BitVector {
76  RegisterSet() = default;
77  explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
78  RegisterSet(const RegisterSet &RS) = default;
79 
80  using BitVector::clear;
81  using BitVector::count;
82 
83  unsigned find_first() const {
84  int First = BitVector::find_first();
85  if (First < 0)
86  return 0;
87  return x2v(First);
88  }
89 
90  unsigned find_next(unsigned Prev) const {
91  int Next = BitVector::find_next(v2x(Prev));
92  if (Next < 0)
93  return 0;
94  return x2v(Next);
95  }
96 
97  RegisterSet &insert(unsigned R) {
98  unsigned Idx = v2x(R);
99  ensure(Idx);
100  return static_cast<RegisterSet&>(BitVector::set(Idx));
101  }
102  RegisterSet &remove(unsigned R) {
103  unsigned Idx = v2x(R);
104  if (Idx >= size())
105  return *this;
106  return static_cast<RegisterSet&>(BitVector::reset(Idx));
107  }
108 
109  RegisterSet &insert(const RegisterSet &Rs) {
110  return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
111  }
112  RegisterSet &remove(const RegisterSet &Rs) {
113  return static_cast<RegisterSet&>(BitVector::reset(Rs));
114  }
115 
116  reference operator[](unsigned R) {
117  unsigned Idx = v2x(R);
118  ensure(Idx);
119  return BitVector::operator[](Idx);
120  }
121  bool operator[](unsigned R) const {
122  unsigned Idx = v2x(R);
123  assert(Idx < size());
124  return BitVector::operator[](Idx);
125  }
126  bool has(unsigned R) const {
127  unsigned Idx = v2x(R);
128  if (Idx >= size())
129  return false;
130  return BitVector::test(Idx);
131  }
132 
133  bool empty() const {
134  return !BitVector::any();
135  }
136  bool includes(const RegisterSet &Rs) const {
137  // A.BitVector::test(B) <=> A-B != {}
138  return !Rs.BitVector::test(*this);
139  }
140  bool intersects(const RegisterSet &Rs) const {
141  return BitVector::anyCommon(Rs);
142  }
143 
144  private:
145  void ensure(unsigned Idx) {
146  if (size() <= Idx)
147  resize(std::max(Idx+1, 32U));
148  }
149 
150  static inline unsigned v2x(unsigned v) {
152  }
153 
154  static inline unsigned x2v(unsigned x) {
156  }
157  };
158 
159  struct PrintRegSet {
160  PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
161  : RS(S), TRI(RI) {}
162 
163  friend raw_ostream &operator<< (raw_ostream &OS,
164  const PrintRegSet &P);
165 
166  private:
167  const RegisterSet &RS;
168  const TargetRegisterInfo *TRI;
169  };
170 
171  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
173  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
174  OS << '{';
175  for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
176  OS << ' ' << PrintReg(R, P.TRI);
177  OS << " }";
178  return OS;
179  }
180 
181  class Transformation;
182 
183  class HexagonBitSimplify : public MachineFunctionPass {
184  public:
185  static char ID;
186 
187  HexagonBitSimplify() : MachineFunctionPass(ID) {
189  }
190 
191  StringRef getPassName() const override {
192  return "Hexagon bit simplification";
193  }
194 
195  void getAnalysisUsage(AnalysisUsage &AU) const override {
199  }
200 
201  bool runOnMachineFunction(MachineFunction &MF) override;
202 
203  static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
204  static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
205  static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
206  const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
207  static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
208  uint16_t W);
209  static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
210  uint16_t W, uint64_t &U);
211  static bool replaceReg(unsigned OldR, unsigned NewR,
213  static bool getSubregMask(const BitTracker::RegisterRef &RR,
214  unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
215  static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
216  unsigned NewSR, MachineRegisterInfo &MRI);
217  static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
218  unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
219  static bool parseRegSequence(const MachineInstr &I,
221  const MachineRegisterInfo &MRI);
222 
223  static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
224  uint16_t Begin);
225  static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
226  uint16_t Begin, const HexagonInstrInfo &HII);
227 
228  static const TargetRegisterClass *getFinalVRegClass(
230  static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
232 
233  private:
234  MachineDominatorTree *MDT = nullptr;
235 
236  bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
237  static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
238  unsigned NewSub = Hexagon::NoSubRegister);
239  };
240 
241  using HBS = HexagonBitSimplify;
242 
243  // The purpose of this class is to provide a common facility to traverse
244  // the function top-down or bottom-up via the dominator tree, and keep
245  // track of the available registers.
246  class Transformation {
247  public:
248  bool TopDown;
249 
250  Transformation(bool TD) : TopDown(TD) {}
251  virtual ~Transformation() = default;
252 
253  virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
254  };
255 
256 } // end anonymous namespace
257 
258 char HexagonBitSimplify::ID = 0;
259 
260 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
261  "Hexagon bit simplification", false, false)
263 INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
264  "Hexagon bit simplification", false, false)
265 
266 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
267  RegisterSet &AVs) {
268  bool Changed = false;
269 
270  if (T.TopDown)
271  Changed = T.processBlock(B, AVs);
272 
273  RegisterSet Defs;
274  for (auto &I : B)
275  getInstrDefs(I, Defs);
276  RegisterSet NewAVs = AVs;
277  NewAVs.insert(Defs);
278 
279  for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
280  Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
281 
282  if (!T.TopDown)
283  Changed |= T.processBlock(B, AVs);
284 
285  return Changed;
286 }
287 
288 //
289 // Utility functions:
290 //
291 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
292  RegisterSet &Defs) {
293  for (auto &Op : MI.operands()) {
294  if (!Op.isReg() || !Op.isDef())
295  continue;
296  unsigned R = Op.getReg();
298  continue;
299  Defs.insert(R);
300  }
301 }
302 
303 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
304  RegisterSet &Uses) {
305  for (auto &Op : MI.operands()) {
306  if (!Op.isReg() || !Op.isUse())
307  continue;
308  unsigned R = Op.getReg();
310  continue;
311  Uses.insert(R);
312  }
313 }
314 
315 // Check if all the bits in range [B, E) in both cells are equal.
317  uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
318  uint16_t W) {
319  for (uint16_t i = 0; i < W; ++i) {
320  // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
321  if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
322  return false;
323  // Same for RC2[i].
324  if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
325  return false;
326  if (RC1[B1+i] != RC2[B2+i])
327  return false;
328  }
329  return true;
330 }
331 
333  uint16_t B, uint16_t W) {
334  assert(B < RC.width() && B+W <= RC.width());
335  for (uint16_t i = B; i < B+W; ++i)
336  if (!RC[i].is(0))
337  return false;
338  return true;
339 }
340 
341 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
342  uint16_t B, uint16_t W, uint64_t &U) {
343  assert(B < RC.width() && B+W <= RC.width());
344  int64_t T = 0;
345  for (uint16_t i = B+W; i > B; --i) {
346  const BitTracker::BitValue &BV = RC[i-1];
347  T <<= 1;
348  if (BV.is(1))
349  T |= 1;
350  else if (!BV.is(0))
351  return false;
352  }
353  U = T;
354  return true;
355 }
356 
357 bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
361  return false;
362  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
363  decltype(End) NextI;
364  for (auto I = Begin; I != End; I = NextI) {
365  NextI = std::next(I);
366  I->setReg(NewR);
367  }
368  return Begin != End;
369 }
370 
371 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
372  unsigned NewSR, MachineRegisterInfo &MRI) {
375  return false;
376  if (hasTiedUse(OldR, MRI, NewSR))
377  return false;
378  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
379  decltype(End) NextI;
380  for (auto I = Begin; I != End; I = NextI) {
381  NextI = std::next(I);
382  I->setReg(NewR);
383  I->setSubReg(NewSR);
384  }
385  return Begin != End;
386 }
387 
388 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
389  unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
392  return false;
393  if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
394  return false;
395  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
396  decltype(End) NextI;
397  for (auto I = Begin; I != End; I = NextI) {
398  NextI = std::next(I);
399  if (I->getSubReg() != OldSR)
400  continue;
401  I->setReg(NewR);
402  I->setSubReg(NewSR);
403  }
404  return Begin != End;
405 }
406 
407 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
408 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
409 // if this succeeded, otherwise return false.
410 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
411  unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
412  const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
413  if (RR.Sub == 0) {
414  Begin = 0;
415  Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);
416  return true;
417  }
418 
419  Begin = 0;
420 
421  switch (RC->getID()) {
422  case Hexagon::DoubleRegsRegClassID:
423  case Hexagon::HvxWRRegClassID:
424  Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;
425  if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
426  Begin = Width;
427  break;
428  default:
429  return false;
430  }
431  return true;
432 }
433 
434 
435 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
436 // subregister.
437 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
439  const MachineRegisterInfo &MRI) {
440  assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
441  unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
442  auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg());
443  auto &HRI = static_cast<const HexagonRegisterInfo&>(
444  *MRI.getTargetRegisterInfo());
445  unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);
446  unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);
447  assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
448  if (Sub1 == SubLo && Sub2 == SubHi) {
449  SL = I.getOperand(1);
450  SH = I.getOperand(3);
451  return true;
452  }
453  if (Sub1 == SubHi && Sub2 == SubLo) {
454  SH = I.getOperand(1);
455  SL = I.getOperand(3);
456  return true;
457  }
458  return false;
459 }
460 
461 // All stores (except 64-bit stores) take a 32-bit register as the source
462 // of the value to be stored. If the instruction stores into a location
463 // that is shorter than 32 bits, some bits of the source register are not
464 // used. For each store instruction, calculate the set of used bits in
465 // the source register, and set appropriate bits in Bits. Return true if
466 // the bits are calculated, false otherwise.
467 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
468  uint16_t Begin) {
469  using namespace Hexagon;
470 
471  switch (Opc) {
472  // Store byte
473  case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
474  case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
475  case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
476  case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
477  case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
478  case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
479  case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
480  case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
481  case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
482  case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
483  case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
484  case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
485  case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
486  case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
487  case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
488  case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
489  case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
490  case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
491  case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
492  case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
493  case S4_storerb_ap: // memb(Re32=#U6)=Rt32
494  case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
495  case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
496  case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
497  case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
498  case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
499  case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
500  case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
501  case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
502  case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
503  case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
504  case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
505  case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
506  case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
507  case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
508  case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
509  case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
510  case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
511  case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
512  case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
513  case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
514  case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
515  case S2_storerbgp: // memb(gp+#u16:0)=Rt32
516  case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
517  case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
518  case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
519  case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
520  case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
521  case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
522  case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
523  case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
524  case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
525  Bits.set(Begin, Begin+8);
526  return true;
527 
528  // Store low half
529  case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
530  case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
531  case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
532  case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
533  case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
534  case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
535  case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
536  case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
537  case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
538  case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
539  case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
540  case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
541  case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
542  case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
543  case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
544  case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
545  case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
546  case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
547  case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
548  case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
549  case S4_storerh_ap: // memh(Re32=#U6)=Rt32
550  case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
551  case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
552  case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
553  case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
554  case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
555  case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
556  case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
557  case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
558  case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
559  case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
560  case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
561  case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
562  case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
563  case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
564  case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
565  case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
566  case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
567  case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
568  case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
569  case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
570  case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
571  case S2_storerhgp: // memh(gp+#u16:1)=Rt32
572  case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
573  case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
574  case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
575  case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
576  case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
577  case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
578  case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
579  case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
580  case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
581  Bits.set(Begin, Begin+16);
582  return true;
583 
584  // Store high half
585  case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
586  case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
587  case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
588  case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
589  case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
590  case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
591  case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
592  case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
593  case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
594  case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
595  case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
596  case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
597  case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
598  case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
599  case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
600  case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
601  case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
602  case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
603  case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
604  case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
605  case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
606  case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
607  case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
608  case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
609  case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
610  case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
611  Bits.set(Begin+16, Begin+32);
612  return true;
613  }
614 
615  return false;
616 }
617 
618 // For an instruction with opcode Opc, calculate the set of bits that it
619 // uses in a register in operand OpN. This only calculates the set of used
620 // bits for cases where it does not depend on any operands (as is the case
621 // in shifts, for example). For concrete instructions from a program, the
622 // operand may be a subregister of a larger register, while Bits would
623 // correspond to the larger register in its entirety. Because of that,
624 // the parameter Begin can be used to indicate which bit of Bits should be
625 // considered the LSB of of the operand.
626 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
627  BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
628  using namespace Hexagon;
629 
630  const MCInstrDesc &D = HII.get(Opc);
631  if (D.mayStore()) {
632  if (OpN == D.getNumOperands()-1)
633  return getUsedBitsInStore(Opc, Bits, Begin);
634  return false;
635  }
636 
637  switch (Opc) {
638  // One register source. Used bits: R1[0-7].
639  case A2_sxtb:
640  case A2_zxtb:
641  case A4_cmpbeqi:
642  case A4_cmpbgti:
643  case A4_cmpbgtui:
644  if (OpN == 1) {
645  Bits.set(Begin, Begin+8);
646  return true;
647  }
648  break;
649 
650  // One register source. Used bits: R1[0-15].
651  case A2_aslh:
652  case A2_sxth:
653  case A2_zxth:
654  case A4_cmpheqi:
655  case A4_cmphgti:
656  case A4_cmphgtui:
657  if (OpN == 1) {
658  Bits.set(Begin, Begin+16);
659  return true;
660  }
661  break;
662 
663  // One register source. Used bits: R1[16-31].
664  case A2_asrh:
665  if (OpN == 1) {
666  Bits.set(Begin+16, Begin+32);
667  return true;
668  }
669  break;
670 
671  // Two register sources. Used bits: R1[0-7], R2[0-7].
672  case A4_cmpbeq:
673  case A4_cmpbgt:
674  case A4_cmpbgtu:
675  if (OpN == 1) {
676  Bits.set(Begin, Begin+8);
677  return true;
678  }
679  break;
680 
681  // Two register sources. Used bits: R1[0-15], R2[0-15].
682  case A4_cmpheq:
683  case A4_cmphgt:
684  case A4_cmphgtu:
685  case A2_addh_h16_ll:
686  case A2_addh_h16_sat_ll:
687  case A2_addh_l16_ll:
688  case A2_addh_l16_sat_ll:
689  case A2_combine_ll:
690  case A2_subh_h16_ll:
691  case A2_subh_h16_sat_ll:
692  case A2_subh_l16_ll:
693  case A2_subh_l16_sat_ll:
694  case M2_mpy_acc_ll_s0:
695  case M2_mpy_acc_ll_s1:
696  case M2_mpy_acc_sat_ll_s0:
697  case M2_mpy_acc_sat_ll_s1:
698  case M2_mpy_ll_s0:
699  case M2_mpy_ll_s1:
700  case M2_mpy_nac_ll_s0:
701  case M2_mpy_nac_ll_s1:
702  case M2_mpy_nac_sat_ll_s0:
703  case M2_mpy_nac_sat_ll_s1:
704  case M2_mpy_rnd_ll_s0:
705  case M2_mpy_rnd_ll_s1:
706  case M2_mpy_sat_ll_s0:
707  case M2_mpy_sat_ll_s1:
708  case M2_mpy_sat_rnd_ll_s0:
709  case M2_mpy_sat_rnd_ll_s1:
710  case M2_mpyd_acc_ll_s0:
711  case M2_mpyd_acc_ll_s1:
712  case M2_mpyd_ll_s0:
713  case M2_mpyd_ll_s1:
714  case M2_mpyd_nac_ll_s0:
715  case M2_mpyd_nac_ll_s1:
716  case M2_mpyd_rnd_ll_s0:
717  case M2_mpyd_rnd_ll_s1:
718  case M2_mpyu_acc_ll_s0:
719  case M2_mpyu_acc_ll_s1:
720  case M2_mpyu_ll_s0:
721  case M2_mpyu_ll_s1:
722  case M2_mpyu_nac_ll_s0:
723  case M2_mpyu_nac_ll_s1:
724  case M2_mpyud_acc_ll_s0:
725  case M2_mpyud_acc_ll_s1:
726  case M2_mpyud_ll_s0:
727  case M2_mpyud_ll_s1:
728  case M2_mpyud_nac_ll_s0:
729  case M2_mpyud_nac_ll_s1:
730  if (OpN == 1 || OpN == 2) {
731  Bits.set(Begin, Begin+16);
732  return true;
733  }
734  break;
735 
736  // Two register sources. Used bits: R1[0-15], R2[16-31].
737  case A2_addh_h16_lh:
738  case A2_addh_h16_sat_lh:
739  case A2_combine_lh:
740  case A2_subh_h16_lh:
741  case A2_subh_h16_sat_lh:
742  case M2_mpy_acc_lh_s0:
743  case M2_mpy_acc_lh_s1:
744  case M2_mpy_acc_sat_lh_s0:
745  case M2_mpy_acc_sat_lh_s1:
746  case M2_mpy_lh_s0:
747  case M2_mpy_lh_s1:
748  case M2_mpy_nac_lh_s0:
749  case M2_mpy_nac_lh_s1:
750  case M2_mpy_nac_sat_lh_s0:
751  case M2_mpy_nac_sat_lh_s1:
752  case M2_mpy_rnd_lh_s0:
753  case M2_mpy_rnd_lh_s1:
754  case M2_mpy_sat_lh_s0:
755  case M2_mpy_sat_lh_s1:
756  case M2_mpy_sat_rnd_lh_s0:
757  case M2_mpy_sat_rnd_lh_s1:
758  case M2_mpyd_acc_lh_s0:
759  case M2_mpyd_acc_lh_s1:
760  case M2_mpyd_lh_s0:
761  case M2_mpyd_lh_s1:
762  case M2_mpyd_nac_lh_s0:
763  case M2_mpyd_nac_lh_s1:
764  case M2_mpyd_rnd_lh_s0:
765  case M2_mpyd_rnd_lh_s1:
766  case M2_mpyu_acc_lh_s0:
767  case M2_mpyu_acc_lh_s1:
768  case M2_mpyu_lh_s0:
769  case M2_mpyu_lh_s1:
770  case M2_mpyu_nac_lh_s0:
771  case M2_mpyu_nac_lh_s1:
772  case M2_mpyud_acc_lh_s0:
773  case M2_mpyud_acc_lh_s1:
774  case M2_mpyud_lh_s0:
775  case M2_mpyud_lh_s1:
776  case M2_mpyud_nac_lh_s0:
777  case M2_mpyud_nac_lh_s1:
778  // These four are actually LH.
779  case A2_addh_l16_hl:
780  case A2_addh_l16_sat_hl:
781  case A2_subh_l16_hl:
782  case A2_subh_l16_sat_hl:
783  if (OpN == 1) {
784  Bits.set(Begin, Begin+16);
785  return true;
786  }
787  if (OpN == 2) {
788  Bits.set(Begin+16, Begin+32);
789  return true;
790  }
791  break;
792 
793  // Two register sources, used bits: R1[16-31], R2[0-15].
794  case A2_addh_h16_hl:
795  case A2_addh_h16_sat_hl:
796  case A2_combine_hl:
797  case A2_subh_h16_hl:
798  case A2_subh_h16_sat_hl:
799  case M2_mpy_acc_hl_s0:
800  case M2_mpy_acc_hl_s1:
801  case M2_mpy_acc_sat_hl_s0:
802  case M2_mpy_acc_sat_hl_s1:
803  case M2_mpy_hl_s0:
804  case M2_mpy_hl_s1:
805  case M2_mpy_nac_hl_s0:
806  case M2_mpy_nac_hl_s1:
807  case M2_mpy_nac_sat_hl_s0:
808  case M2_mpy_nac_sat_hl_s1:
809  case M2_mpy_rnd_hl_s0:
810  case M2_mpy_rnd_hl_s1:
811  case M2_mpy_sat_hl_s0:
812  case M2_mpy_sat_hl_s1:
813  case M2_mpy_sat_rnd_hl_s0:
814  case M2_mpy_sat_rnd_hl_s1:
815  case M2_mpyd_acc_hl_s0:
816  case M2_mpyd_acc_hl_s1:
817  case M2_mpyd_hl_s0:
818  case M2_mpyd_hl_s1:
819  case M2_mpyd_nac_hl_s0:
820  case M2_mpyd_nac_hl_s1:
821  case M2_mpyd_rnd_hl_s0:
822  case M2_mpyd_rnd_hl_s1:
823  case M2_mpyu_acc_hl_s0:
824  case M2_mpyu_acc_hl_s1:
825  case M2_mpyu_hl_s0:
826  case M2_mpyu_hl_s1:
827  case M2_mpyu_nac_hl_s0:
828  case M2_mpyu_nac_hl_s1:
829  case M2_mpyud_acc_hl_s0:
830  case M2_mpyud_acc_hl_s1:
831  case M2_mpyud_hl_s0:
832  case M2_mpyud_hl_s1:
833  case M2_mpyud_nac_hl_s0:
834  case M2_mpyud_nac_hl_s1:
835  if (OpN == 1) {
836  Bits.set(Begin+16, Begin+32);
837  return true;
838  }
839  if (OpN == 2) {
840  Bits.set(Begin, Begin+16);
841  return true;
842  }
843  break;
844 
845  // Two register sources, used bits: R1[16-31], R2[16-31].
846  case A2_addh_h16_hh:
847  case A2_addh_h16_sat_hh:
848  case A2_combine_hh:
849  case A2_subh_h16_hh:
850  case A2_subh_h16_sat_hh:
851  case M2_mpy_acc_hh_s0:
852  case M2_mpy_acc_hh_s1:
853  case M2_mpy_acc_sat_hh_s0:
854  case M2_mpy_acc_sat_hh_s1:
855  case M2_mpy_hh_s0:
856  case M2_mpy_hh_s1:
857  case M2_mpy_nac_hh_s0:
858  case M2_mpy_nac_hh_s1:
859  case M2_mpy_nac_sat_hh_s0:
860  case M2_mpy_nac_sat_hh_s1:
861  case M2_mpy_rnd_hh_s0:
862  case M2_mpy_rnd_hh_s1:
863  case M2_mpy_sat_hh_s0:
864  case M2_mpy_sat_hh_s1:
865  case M2_mpy_sat_rnd_hh_s0:
866  case M2_mpy_sat_rnd_hh_s1:
867  case M2_mpyd_acc_hh_s0:
868  case M2_mpyd_acc_hh_s1:
869  case M2_mpyd_hh_s0:
870  case M2_mpyd_hh_s1:
871  case M2_mpyd_nac_hh_s0:
872  case M2_mpyd_nac_hh_s1:
873  case M2_mpyd_rnd_hh_s0:
874  case M2_mpyd_rnd_hh_s1:
875  case M2_mpyu_acc_hh_s0:
876  case M2_mpyu_acc_hh_s1:
877  case M2_mpyu_hh_s0:
878  case M2_mpyu_hh_s1:
879  case M2_mpyu_nac_hh_s0:
880  case M2_mpyu_nac_hh_s1:
881  case M2_mpyud_acc_hh_s0:
882  case M2_mpyud_acc_hh_s1:
883  case M2_mpyud_hh_s0:
884  case M2_mpyud_hh_s1:
885  case M2_mpyud_nac_hh_s0:
886  case M2_mpyud_nac_hh_s1:
887  if (OpN == 1 || OpN == 2) {
888  Bits.set(Begin+16, Begin+32);
889  return true;
890  }
891  break;
892  }
893 
894  return false;
895 }
896 
897 // Calculate the register class that matches Reg:Sub. For example, if
898 // vreg1 is a double register, then vreg1:isub_hi would match the "int"
899 // register class.
900 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
903  return nullptr;
904  auto *RC = MRI.getRegClass(RR.Reg);
905  if (RR.Sub == 0)
906  return RC;
907  auto &HRI = static_cast<const HexagonRegisterInfo&>(
908  *MRI.getTargetRegisterInfo());
909 
910  auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
911  (void)HRI;
912  assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) ||
913  Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi));
914  };
915 
916  switch (RC->getID()) {
917  case Hexagon::DoubleRegsRegClassID:
918  VerifySR(RC, RR.Sub);
919  return &Hexagon::IntRegsRegClass;
920  case Hexagon::HvxWRRegClassID:
921  VerifySR(RC, RR.Sub);
922  return &Hexagon::HvxVRRegClass;
923  }
924  return nullptr;
925 }
926 
927 // Check if RD could be replaced with RS at any possible use of RD.
928 // For example a predicate register cannot be replaced with a integer
929 // register, but a 64-bit register with a subregister can be replaced
930 // with a 32-bit register.
931 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
935  return false;
936  // Return false if one (or both) classes are nullptr.
937  auto *DRC = getFinalVRegClass(RD, MRI);
938  if (!DRC)
939  return false;
940 
941  return DRC == getFinalVRegClass(RS, MRI);
942 }
943 
944 bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
945  unsigned NewSub) {
946  if (!PreserveTiedOps)
947  return false;
948  return llvm::any_of(MRI.use_operands(Reg),
949  [NewSub] (const MachineOperand &Op) -> bool {
950  return Op.getSubReg() != NewSub && Op.isTied();
951  });
952 }
953 
954 namespace {
955 
956  class DeadCodeElimination {
957  public:
958  DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
959  : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
960  MDT(mdt), MRI(mf.getRegInfo()) {}
961 
962  bool run() {
963  return runOnNode(MDT.getRootNode());
964  }
965 
966  private:
967  bool isDead(unsigned R) const;
968  bool runOnNode(MachineDomTreeNode *N);
969 
970  MachineFunction &MF;
971  const HexagonInstrInfo &HII;
974  };
975 
976 } // end anonymous namespace
977 
978 bool DeadCodeElimination::isDead(unsigned R) const {
979  for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
980  MachineInstr *UseI = I->getParent();
981  if (UseI->isDebugValue())
982  continue;
983  if (UseI->isPHI()) {
984  assert(!UseI->getOperand(0).getSubReg());
985  unsigned DR = UseI->getOperand(0).getReg();
986  if (DR == R)
987  continue;
988  }
989  return false;
990  }
991  return true;
992 }
993 
994 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
995  bool Changed = false;
996 
997  for (auto *DTN : children<MachineDomTreeNode*>(N))
998  Changed |= runOnNode(DTN);
999 
1000  MachineBasicBlock *B = N->getBlock();
1001  std::vector<MachineInstr*> Instrs;
1002  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
1003  Instrs.push_back(&*I);
1004 
1005  for (auto MI : Instrs) {
1006  unsigned Opc = MI->getOpcode();
1007  // Do not touch lifetime markers. This is why the target-independent DCE
1008  // cannot be used.
1009  if (Opc == TargetOpcode::LIFETIME_START ||
1011  continue;
1012  bool Store = false;
1013  if (MI->isInlineAsm())
1014  continue;
1015  // Delete PHIs if possible.
1016  if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
1017  continue;
1018 
1019  bool AllDead = true;
1021  for (auto &Op : MI->operands()) {
1022  if (!Op.isReg() || !Op.isDef())
1023  continue;
1024  unsigned R = Op.getReg();
1025  if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
1026  AllDead = false;
1027  break;
1028  }
1029  Regs.push_back(R);
1030  }
1031  if (!AllDead)
1032  continue;
1033 
1034  B->erase(MI);
1035  for (unsigned i = 0, n = Regs.size(); i != n; ++i)
1036  MRI.markUsesInDebugValueAsUndef(Regs[i]);
1037  Changed = true;
1038  }
1039 
1040  return Changed;
1041 }
1042 
1043 namespace {
1044 
1045 // Eliminate redundant instructions
1046 //
1047 // This transformation will identify instructions where the output register
1048 // is the same as one of its input registers. This only works on instructions
1049 // that define a single register (unlike post-increment loads, for example).
1050 // The equality check is actually more detailed: the code calculates which
1051 // bits of the output are used, and only compares these bits with the input
1052 // registers.
1053 // If the output matches an input, the instruction is replaced with COPY.
1054 // The copies will be removed by another transformation.
1055  class RedundantInstrElimination : public Transformation {
1056  public:
1057  RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
1058  const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1059  : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1060 
1061  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1062 
1063  private:
1064  bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
1065  unsigned &LostB, unsigned &LostE);
1066  bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
1067  unsigned &LostB, unsigned &LostE);
1068  bool computeUsedBits(unsigned Reg, BitVector &Bits);
1069  bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
1070  uint16_t Begin);
1071  bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
1072 
1073  const HexagonInstrInfo &HII;
1074  const HexagonRegisterInfo &HRI;
1076  BitTracker &BT;
1077  };
1078 
1079 } // end anonymous namespace
1080 
1081 // Check if the instruction is a lossy shift left, where the input being
1082 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1083 // of bit indices that are lost.
1084 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
1085  unsigned OpN, unsigned &LostB, unsigned &LostE) {
1086  using namespace Hexagon;
1087 
1088  unsigned Opc = MI.getOpcode();
1089  unsigned ImN, RegN, Width;
1090  switch (Opc) {
1091  case S2_asl_i_p:
1092  ImN = 2;
1093  RegN = 1;
1094  Width = 64;
1095  break;
1096  case S2_asl_i_p_acc:
1097  case S2_asl_i_p_and:
1098  case S2_asl_i_p_nac:
1099  case S2_asl_i_p_or:
1100  case S2_asl_i_p_xacc:
1101  ImN = 3;
1102  RegN = 2;
1103  Width = 64;
1104  break;
1105  case S2_asl_i_r:
1106  ImN = 2;
1107  RegN = 1;
1108  Width = 32;
1109  break;
1110  case S2_addasl_rrri:
1111  case S4_andi_asl_ri:
1112  case S4_ori_asl_ri:
1113  case S4_addi_asl_ri:
1114  case S4_subi_asl_ri:
1115  case S2_asl_i_r_acc:
1116  case S2_asl_i_r_and:
1117  case S2_asl_i_r_nac:
1118  case S2_asl_i_r_or:
1119  case S2_asl_i_r_sat:
1120  case S2_asl_i_r_xacc:
1121  ImN = 3;
1122  RegN = 2;
1123  Width = 32;
1124  break;
1125  default:
1126  return false;
1127  }
1128 
1129  if (RegN != OpN)
1130  return false;
1131 
1132  assert(MI.getOperand(ImN).isImm());
1133  unsigned S = MI.getOperand(ImN).getImm();
1134  if (S == 0)
1135  return false;
1136  LostB = Width-S;
1137  LostE = Width;
1138  return true;
1139 }
1140 
1141 // Check if the instruction is a lossy shift right, where the input being
1142 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1143 // of bit indices that are lost.
1144 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
1145  unsigned OpN, unsigned &LostB, unsigned &LostE) {
1146  using namespace Hexagon;
1147 
1148  unsigned Opc = MI.getOpcode();
1149  unsigned ImN, RegN;
1150  switch (Opc) {
1151  case S2_asr_i_p:
1152  case S2_lsr_i_p:
1153  ImN = 2;
1154  RegN = 1;
1155  break;
1156  case S2_asr_i_p_acc:
1157  case S2_asr_i_p_and:
1158  case S2_asr_i_p_nac:
1159  case S2_asr_i_p_or:
1160  case S2_lsr_i_p_acc:
1161  case S2_lsr_i_p_and:
1162  case S2_lsr_i_p_nac:
1163  case S2_lsr_i_p_or:
1164  case S2_lsr_i_p_xacc:
1165  ImN = 3;
1166  RegN = 2;
1167  break;
1168  case S2_asr_i_r:
1169  case S2_lsr_i_r:
1170  ImN = 2;
1171  RegN = 1;
1172  break;
1173  case S4_andi_lsr_ri:
1174  case S4_ori_lsr_ri:
1175  case S4_addi_lsr_ri:
1176  case S4_subi_lsr_ri:
1177  case S2_asr_i_r_acc:
1178  case S2_asr_i_r_and:
1179  case S2_asr_i_r_nac:
1180  case S2_asr_i_r_or:
1181  case S2_lsr_i_r_acc:
1182  case S2_lsr_i_r_and:
1183  case S2_lsr_i_r_nac:
1184  case S2_lsr_i_r_or:
1185  case S2_lsr_i_r_xacc:
1186  ImN = 3;
1187  RegN = 2;
1188  break;
1189 
1190  default:
1191  return false;
1192  }
1193 
1194  if (RegN != OpN)
1195  return false;
1196 
1197  assert(MI.getOperand(ImN).isImm());
1198  unsigned S = MI.getOperand(ImN).getImm();
1199  LostB = 0;
1200  LostE = S;
1201  return true;
1202 }
1203 
1204 // Calculate the bit vector that corresponds to the used bits of register Reg.
1205 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1206 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1207 // wise, it returns true and sets the corresponding bits in Bits.
1208 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
1209  BitVector Used(Bits.size());
1210  RegisterSet Visited;
1211  std::vector<unsigned> Pending;
1212  Pending.push_back(Reg);
1213 
1214  for (unsigned i = 0; i < Pending.size(); ++i) {
1215  unsigned R = Pending[i];
1216  if (Visited.has(R))
1217  continue;
1218  Visited.insert(R);
1219  for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
1220  BitTracker::RegisterRef UR = *I;
1221  unsigned B, W;
1222  if (!HBS::getSubregMask(UR, B, W, MRI))
1223  return false;
1224  MachineInstr &UseI = *I->getParent();
1225  if (UseI.isPHI() || UseI.isCopy()) {
1226  unsigned DefR = UseI.getOperand(0).getReg();
1228  return false;
1229  Pending.push_back(DefR);
1230  } else {
1231  if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
1232  return false;
1233  }
1234  }
1235  }
1236  Bits |= Used;
1237  return true;
1238 }
1239 
1240 // Calculate the bits used by instruction MI in a register in operand OpN.
1241 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1242 // used bits in Bits. This function does not reset any bits in Bits, so
1243 // subsequent calls over different instructions will result in the union
1244 // of the used bits in all these instructions.
1245 // The register in question may be used with a sub-register, whereas Bits
1246 // holds the bits for the entire register. To keep track of that, the
1247 // argument Begin indicates where in Bits is the lowest-significant bit
1248 // of the register used in operand OpN. For example, in instruction:
1249 // vreg1 = S2_lsr_i_r vreg2:isub_hi, 10
1250 // the operand 1 is a 32-bit register, which happens to be a subregister
1251 // of the 64-bit register vreg2, and that subregister starts at position 32.
1252 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1253 // of vreg2:isub_hi.
1254 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
1255  unsigned OpN, BitVector &Bits, uint16_t Begin) {
1256  unsigned Opc = MI.getOpcode();
1257  BitVector T(Bits.size());
1258  bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
1259  // Even if we don't have bits yet, we could still provide some information
1260  // if the instruction is a lossy shift: the lost bits will be marked as
1261  // not used.
1262  unsigned LB, LE;
1263  if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1264  assert(MI.getOperand(OpN).isReg());
1265  BitTracker::RegisterRef RR = MI.getOperand(OpN);
1266  const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
1267  uint16_t Width = HRI.getRegSizeInBits(*RC);
1268 
1269  if (!GotBits)
1270  T.set(Begin, Begin+Width);
1271  assert(LB <= LE && LB < Width && LE <= Width);
1272  T.reset(Begin+LB, Begin+LE);
1273  GotBits = true;
1274  }
1275  if (GotBits)
1276  Bits |= T;
1277  return GotBits;
1278 }
1279 
1280 // Calculates the used bits in RD ("defined register"), and checks if these
1281 // bits in RS ("used register") and RD are identical.
1282 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
1284  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1285  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1286 
1287  unsigned DB, DW;
1288  if (!HBS::getSubregMask(RD, DB, DW, MRI))
1289  return false;
1290  unsigned SB, SW;
1291  if (!HBS::getSubregMask(RS, SB, SW, MRI))
1292  return false;
1293  if (SW != DW)
1294  return false;
1295 
1296  BitVector Used(DC.width());
1297  if (!computeUsedBits(RD.Reg, Used))
1298  return false;
1299 
1300  for (unsigned i = 0; i != DW; ++i)
1301  if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1302  return false;
1303  return true;
1304 }
1305 
1306 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
1307  const RegisterSet&) {
1308  if (!BT.reached(&B))
1309  return false;
1310  bool Changed = false;
1311 
1312  for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
1313  NextI = std::next(I);
1314  MachineInstr *MI = &*I;
1315 
1316  if (MI->getOpcode() == TargetOpcode::COPY)
1317  continue;
1318  if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
1319  continue;
1320  unsigned NumD = MI->getDesc().getNumDefs();
1321  if (NumD != 1)
1322  continue;
1323 
1324  BitTracker::RegisterRef RD = MI->getOperand(0);
1325  if (!BT.has(RD.Reg))
1326  continue;
1327  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1328  auto At = MachineBasicBlock::iterator(MI);
1329 
1330  // Find a source operand that is equal to the result.
1331  for (auto &Op : MI->uses()) {
1332  if (!Op.isReg())
1333  continue;
1335  if (!BT.has(RS.Reg))
1336  continue;
1337  if (!HBS::isTransparentCopy(RD, RS, MRI))
1338  continue;
1339 
1340  unsigned BN, BW;
1341  if (!HBS::getSubregMask(RS, BN, BW, MRI))
1342  continue;
1343 
1344  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1345  if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1346  continue;
1347 
1348  // If found, replace the instruction with a COPY.
1349  const DebugLoc &DL = MI->getDebugLoc();
1350  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
1351  unsigned NewR = MRI.createVirtualRegister(FRC);
1352  MachineInstr *CopyI =
1353  BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1354  .addReg(RS.Reg, 0, RS.Sub);
1355  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
1356  // This pass can create copies between registers that don't have the
1357  // exact same values. Updating the tracker has to involve updating
1358  // all dependent cells. Example:
1359  // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal
1360  //
1361  // vreg3 = copy vreg2 ; <- inserted
1362  // ... = vreg3 ; <- replaced from vreg2
1363  // Indirectly, we can create a "copy" between vreg1 and vreg2 even
1364  // though their exact values do not match.
1365  BT.visit(*CopyI);
1366  Changed = true;
1367  break;
1368  }
1369  }
1370 
1371  return Changed;
1372 }
1373 
1374 namespace {
1375 
1376 // Recognize instructions that produce constant values known at compile-time.
1377 // Replace them with register definitions that load these constants directly.
1378  class ConstGeneration : public Transformation {
1379  public:
1380  ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1381  MachineRegisterInfo &mri)
1382  : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1383 
1384  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1385  static bool isTfrConst(const MachineInstr &MI);
1386 
1387  private:
1388  unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
1390 
1391  const HexagonInstrInfo &HII;
1393  BitTracker &BT;
1394  };
1395 
1396 } // end anonymous namespace
1397 
1398 bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
1399  unsigned Opc = MI.getOpcode();
1400  switch (Opc) {
1401  case Hexagon::A2_combineii:
1402  case Hexagon::A4_combineii:
1403  case Hexagon::A2_tfrsi:
1404  case Hexagon::A2_tfrpi:
1405  case Hexagon::PS_true:
1406  case Hexagon::PS_false:
1407  case Hexagon::CONST32:
1408  case Hexagon::CONST64:
1409  return true;
1410  }
1411  return false;
1412 }
1413 
1414 // Generate a transfer-immediate instruction that is appropriate for the
1415 // register class and the actual value being transferred.
1416 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
1418  unsigned Reg = MRI.createVirtualRegister(RC);
1419  if (RC == &Hexagon::IntRegsRegClass) {
1420  BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
1421  .addImm(int32_t(C));
1422  return Reg;
1423  }
1424 
1425  if (RC == &Hexagon::DoubleRegsRegClass) {
1426  if (isInt<8>(C)) {
1427  BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
1428  .addImm(C);
1429  return Reg;
1430  }
1431 
1432  unsigned Lo = Lo_32(C), Hi = Hi_32(C);
1433  if (isInt<8>(Lo) || isInt<8>(Hi)) {
1434  unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
1435  : Hexagon::A4_combineii;
1436  BuildMI(B, At, DL, HII.get(Opc), Reg)
1437  .addImm(int32_t(Hi))
1438  .addImm(int32_t(Lo));
1439  return Reg;
1440  }
1441 
1442  BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
1443  .addImm(C);
1444  return Reg;
1445  }
1446 
1447  if (RC == &Hexagon::PredRegsRegClass) {
1448  unsigned Opc;
1449  if (C == 0)
1450  Opc = Hexagon::PS_false;
1451  else if ((C & 0xFF) == 0xFF)
1452  Opc = Hexagon::PS_true;
1453  else
1454  return 0;
1455  BuildMI(B, At, DL, HII.get(Opc), Reg);
1456  return Reg;
1457  }
1458 
1459  return 0;
1460 }
1461 
1462 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1463  if (!BT.reached(&B))
1464  return false;
1465  bool Changed = false;
1466  RegisterSet Defs;
1467 
1468  for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1469  if (isTfrConst(*I))
1470  continue;
1471  Defs.clear();
1472  HBS::getInstrDefs(*I, Defs);
1473  if (Defs.count() != 1)
1474  continue;
1475  unsigned DR = Defs.find_first();
1477  continue;
1478  uint64_t U;
1479  const BitTracker::RegisterCell &DRC = BT.lookup(DR);
1480  if (HBS::getConst(DRC, 0, DRC.width(), U)) {
1481  int64_t C = U;
1482  DebugLoc DL = I->getDebugLoc();
1483  auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1484  unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
1485  if (ImmReg) {
1486  HBS::replaceReg(DR, ImmReg, MRI);
1487  BT.put(ImmReg, DRC);
1488  Changed = true;
1489  }
1490  }
1491  }
1492  return Changed;
1493 }
1494 
1495 namespace {
1496 
1497 // Identify pairs of available registers which hold identical values.
1498 // In such cases, only one of them needs to be calculated, the other one
1499 // will be defined as a copy of the first.
1500  class CopyGeneration : public Transformation {
1501  public:
1502  CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1503  const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1504  : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1505 
1506  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1507 
1508  private:
1509  bool findMatch(const BitTracker::RegisterRef &Inp,
1510  BitTracker::RegisterRef &Out, const RegisterSet &AVs);
1511 
1512  const HexagonInstrInfo &HII;
1513  const HexagonRegisterInfo &HRI;
1515  BitTracker &BT;
1516  RegisterSet Forbidden;
1517  };
1518 
1519 // Eliminate register copies RD = RS, by replacing the uses of RD with
1520 // with uses of RS.
1521  class CopyPropagation : public Transformation {
1522  public:
1523  CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1524  : Transformation(false), HRI(hri), MRI(mri) {}
1525 
1526  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1527 
1528  static bool isCopyReg(unsigned Opc, bool NoConv);
1529 
1530  private:
1531  bool propagateRegCopy(MachineInstr &MI);
1532 
1533  const HexagonRegisterInfo &HRI;
1535  };
1536 
1537 } // end anonymous namespace
1538 
1539 /// Check if there is a register in AVs that is identical to Inp. If so,
1540 /// set Out to the found register. The output may be a pair Reg:Sub.
1541 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
1542  BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
1543  if (!BT.has(Inp.Reg))
1544  return false;
1545  const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
1546  auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
1547  unsigned B, W;
1548  if (!HBS::getSubregMask(Inp, B, W, MRI))
1549  return false;
1550 
1551  for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
1552  if (!BT.has(R) || Forbidden[R])
1553  continue;
1554  const BitTracker::RegisterCell &RC = BT.lookup(R);
1555  unsigned RW = RC.width();
1556  if (W == RW) {
1557  if (FRC != MRI.getRegClass(R))
1558  continue;
1559  if (!HBS::isTransparentCopy(R, Inp, MRI))
1560  continue;
1561  if (!HBS::isEqual(InpRC, B, RC, 0, W))
1562  continue;
1563  Out.Reg = R;
1564  Out.Sub = 0;
1565  return true;
1566  }
1567  // Check if there is a super-register, whose part (with a subregister)
1568  // is equal to the input.
1569  // Only do double registers for now.
1570  if (W*2 != RW)
1571  continue;
1572  if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1573  continue;
1574 
1575  if (HBS::isEqual(InpRC, B, RC, 0, W))
1576  Out.Sub = Hexagon::isub_lo;
1577  else if (HBS::isEqual(InpRC, B, RC, W, W))
1578  Out.Sub = Hexagon::isub_hi;
1579  else
1580  continue;
1581  Out.Reg = R;
1582  if (HBS::isTransparentCopy(Out, Inp, MRI))
1583  return true;
1584  }
1585  return false;
1586 }
1587 
1588 bool CopyGeneration::processBlock(MachineBasicBlock &B,
1589  const RegisterSet &AVs) {
1590  if (!BT.reached(&B))
1591  return false;
1592  RegisterSet AVB(AVs);
1593  bool Changed = false;
1594  RegisterSet Defs;
1595 
1596  for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
1597  ++I, AVB.insert(Defs)) {
1598  NextI = std::next(I);
1599  Defs.clear();
1600  HBS::getInstrDefs(*I, Defs);
1601 
1602  unsigned Opc = I->getOpcode();
1603  if (CopyPropagation::isCopyReg(Opc, false) ||
1604  ConstGeneration::isTfrConst(*I))
1605  continue;
1606 
1607  DebugLoc DL = I->getDebugLoc();
1608  auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1609 
1610  for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
1612  auto *FRC = HBS::getFinalVRegClass(R, MRI);
1613 
1614  if (findMatch(R, MR, AVB)) {
1615  unsigned NewR = MRI.createVirtualRegister(FRC);
1616  BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1617  .addReg(MR.Reg, 0, MR.Sub);
1618  BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
1619  HBS::replaceReg(R, NewR, MRI);
1620  Forbidden.insert(R);
1621  continue;
1622  }
1623 
1624  if (FRC == &Hexagon::DoubleRegsRegClass ||
1625  FRC == &Hexagon::HvxWRRegClass) {
1626  // Try to generate REG_SEQUENCE.
1627  unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo);
1628  unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi);
1629  BitTracker::RegisterRef TL = { R, SubLo };
1630  BitTracker::RegisterRef TH = { R, SubHi };
1631  BitTracker::RegisterRef ML, MH;
1632  if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1633  auto *FRC = HBS::getFinalVRegClass(R, MRI);
1634  unsigned NewR = MRI.createVirtualRegister(FRC);
1635  BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1636  .addReg(ML.Reg, 0, ML.Sub)
1637  .addImm(SubLo)
1638  .addReg(MH.Reg, 0, MH.Sub)
1639  .addImm(SubHi);
1640  BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
1641  HBS::replaceReg(R, NewR, MRI);
1642  Forbidden.insert(R);
1643  }
1644  }
1645  }
1646  }
1647 
1648  return Changed;
1649 }
1650 
1651 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
1652  switch (Opc) {
1653  case TargetOpcode::COPY:
1654  case TargetOpcode::REG_SEQUENCE:
1655  case Hexagon::A4_combineir:
1656  case Hexagon::A4_combineri:
1657  return true;
1658  case Hexagon::A2_tfr:
1659  case Hexagon::A2_tfrp:
1660  case Hexagon::A2_combinew:
1661  case Hexagon::V6_vcombine:
1662  return NoConv;
1663  default:
1664  break;
1665  }
1666  return false;
1667 }
1668 
1669 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
1670  bool Changed = false;
1671  unsigned Opc = MI.getOpcode();
1673  assert(MI.getOperand(0).getSubReg() == 0);
1674 
1675  switch (Opc) {
1676  case TargetOpcode::COPY:
1677  case Hexagon::A2_tfr:
1678  case Hexagon::A2_tfrp: {
1680  if (!HBS::isTransparentCopy(RD, RS, MRI))
1681  break;
1682  if (RS.Sub != 0)
1683  Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
1684  else
1685  Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
1686  break;
1687  }
1688  case TargetOpcode::REG_SEQUENCE: {
1689  BitTracker::RegisterRef SL, SH;
1690  if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
1691  const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1692  unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1693  unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1694  Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);
1695  Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);
1696  }
1697  break;
1698  }
1699  case Hexagon::A2_combinew:
1700  case Hexagon::V6_vcombine: {
1701  const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1702  unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1703  unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1704  BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
1705  Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);
1706  Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);
1707  break;
1708  }
1709  case Hexagon::A4_combineir:
1710  case Hexagon::A4_combineri: {
1711  unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1712  unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1713  : Hexagon::isub_hi;
1714  BitTracker::RegisterRef RS = MI.getOperand(SrcX);
1715  Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
1716  break;
1717  }
1718  }
1719  return Changed;
1720 }
1721 
1722 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1723  std::vector<MachineInstr*> Instrs;
1724  for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
1725  Instrs.push_back(&*I);
1726 
1727  bool Changed = false;
1728  for (auto I : Instrs) {
1729  unsigned Opc = I->getOpcode();
1730  if (!CopyPropagation::isCopyReg(Opc, true))
1731  continue;
1732  Changed |= propagateRegCopy(*I);
1733  }
1734 
1735  return Changed;
1736 }
1737 
1738 namespace {
1739 
1740 // Recognize patterns that can be simplified and replace them with the
1741 // simpler forms.
1742 // This is by no means complete
1743  class BitSimplification : public Transformation {
1744  public:
1745  BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
1746  const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
1748  : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
1749  MF(mf), BT(bt) {}
1750 
1751  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1752 
1753  private:
1754  struct RegHalf : public BitTracker::RegisterRef {
1755  bool Low; // Low/High halfword.
1756  };
1757 
1758  bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
1759  unsigned B, RegHalf &RH);
1760  bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
1761 
1762  bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
1764  unsigned getCombineOpcode(bool HLow, bool LLow);
1765 
1766  bool genStoreUpperHalf(MachineInstr *MI);
1767  bool genStoreImmediate(MachineInstr *MI);
1768  bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
1769  const BitTracker::RegisterCell &RC);
1770  bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1771  const BitTracker::RegisterCell &RC);
1772  bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1773  const BitTracker::RegisterCell &RC);
1774  bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1775  const BitTracker::RegisterCell &RC);
1776  bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
1777  const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1778  bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
1779  const BitTracker::RegisterCell &RC);
1780  bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1781  const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1782 
1783  // Cache of created instructions to avoid creating duplicates.
1784  // XXX Currently only used by genBitSplit.
1785  std::vector<MachineInstr*> NewMIs;
1786 
1787  const MachineDominatorTree &MDT;
1788  const HexagonInstrInfo &HII;
1789  const HexagonRegisterInfo &HRI;
1791  MachineFunction &MF;
1792  BitTracker &BT;
1793  };
1794 
1795 } // end anonymous namespace
1796 
1797 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1798 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1799 // set the information about the found register in RH.
1800 bool BitSimplification::matchHalf(unsigned SelfR,
1801  const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
1802  // XXX This could be searching in the set of available registers, in case
1803  // the match is not exact.
1804 
1805  // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1806  // register and all the bits B..B+15 match between RC and the register.
1807  // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1808  // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1809  bool Low = false;
1810  unsigned I = B;
1811  while (I < B+16 && RC[I].num())
1812  I++;
1813  if (I == B+16)
1814  return false;
1815 
1816  unsigned Reg = RC[I].RefI.Reg;
1817  unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
1818  if (P < I-B)
1819  return false;
1820  unsigned Pos = P - (I-B);
1821 
1822  if (Reg == 0 || Reg == SelfR) // Don't match "self".
1823  return false;
1825  return false;
1826  if (!BT.has(Reg))
1827  return false;
1828 
1829  const BitTracker::RegisterCell &SC = BT.lookup(Reg);
1830  if (Pos+16 > SC.width())
1831  return false;
1832 
1833  for (unsigned i = 0; i < 16; ++i) {
1834  const BitTracker::BitValue &RV = RC[i+B];
1835  if (RV.Type == BitTracker::BitValue::Ref) {
1836  if (RV.RefI.Reg != Reg)
1837  return false;
1838  if (RV.RefI.Pos != i+Pos)
1839  return false;
1840  continue;
1841  }
1842  if (RC[i+B] != SC[i+Pos])
1843  return false;
1844  }
1845 
1846  unsigned Sub = 0;
1847  switch (Pos) {
1848  case 0:
1849  Sub = Hexagon::isub_lo;
1850  Low = true;
1851  break;
1852  case 16:
1853  Sub = Hexagon::isub_lo;
1854  Low = false;
1855  break;
1856  case 32:
1857  Sub = Hexagon::isub_hi;
1858  Low = true;
1859  break;
1860  case 48:
1861  Sub = Hexagon::isub_hi;
1862  Low = false;
1863  break;
1864  default:
1865  return false;
1866  }
1867 
1868  RH.Reg = Reg;
1869  RH.Sub = Sub;
1870  RH.Low = Low;
1871  // If the subregister is not valid with the register, set it to 0.
1872  if (!HBS::getFinalVRegClass(RH, MRI))
1873  RH.Sub = 0;
1874 
1875  return true;
1876 }
1877 
1878 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
1879  unsigned OpNum) {
1880  auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1881  auto *RRC = HBS::getFinalVRegClass(R, MRI);
1882  return OpRC->hasSubClassEq(RRC);
1883 }
1884 
1885 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1886 // set the inputs Rs and Rt.
1887 bool BitSimplification::matchPackhl(unsigned SelfR,
1890  RegHalf L1, H1, L2, H2;
1891 
1892  if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1893  return false;
1894  if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1895  return false;
1896 
1897  // Rs = H1.L1, Rt = H2.L2
1898  if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1899  return false;
1900  if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1901  return false;
1902 
1903  Rs = H1;
1904  Rt = H2;
1905  return true;
1906 }
1907 
1908 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
1909  return HLow ? LLow ? Hexagon::A2_combine_ll
1910  : Hexagon::A2_combine_lh
1911  : LLow ? Hexagon::A2_combine_hl
1912  : Hexagon::A2_combine_hh;
1913 }
1914 
1915 // If MI stores the upper halfword of a register (potentially obtained via
1916 // shifts or extracts), replace it with a storerf instruction. This could
1917 // cause the "extraction" code to become dead.
1918 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
1919  unsigned Opc = MI->getOpcode();
1920  if (Opc != Hexagon::S2_storerh_io)
1921  return false;
1922 
1923  MachineOperand &ValOp = MI->getOperand(2);
1924  BitTracker::RegisterRef RS = ValOp;
1925  if (!BT.has(RS.Reg))
1926  return false;
1927  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1928  RegHalf H;
1929  if (!matchHalf(0, RC, 0, H))
1930  return false;
1931  if (H.Low)
1932  return false;
1933  MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1934  ValOp.setReg(H.Reg);
1935  ValOp.setSubReg(H.Sub);
1936  return true;
1937 }
1938 
1939 // If MI stores a value known at compile-time, and the value is within a range
1940 // that avoids using constant-extenders, replace it with a store-immediate.
1941 bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
1942  unsigned Opc = MI->getOpcode();
1943  unsigned Align = 0;
1944  switch (Opc) {
1945  case Hexagon::S2_storeri_io:
1946  Align++;
1948  case Hexagon::S2_storerh_io:
1949  Align++;
1951  case Hexagon::S2_storerb_io:
1952  break;
1953  default:
1954  return false;
1955  }
1956 
1957  // Avoid stores to frame-indices (due to an unknown offset).
1958  if (!MI->getOperand(0).isReg())
1959  return false;
1960  MachineOperand &OffOp = MI->getOperand(1);
1961  if (!OffOp.isImm())
1962  return false;
1963 
1964  int64_t Off = OffOp.getImm();
1965  // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1966  if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1967  return false;
1968  // Source register:
1969  BitTracker::RegisterRef RS = MI->getOperand(2);
1970  if (!BT.has(RS.Reg))
1971  return false;
1972  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1973  uint64_t U;
1974  if (!HBS::getConst(RC, 0, RC.width(), U))
1975  return false;
1976 
1977  // Only consider 8-bit values to avoid constant-extenders.
1978  int V;
1979  switch (Opc) {
1980  case Hexagon::S2_storerb_io:
1981  V = int8_t(U);
1982  break;
1983  case Hexagon::S2_storerh_io:
1984  V = int16_t(U);
1985  break;
1986  case Hexagon::S2_storeri_io:
1987  V = int32_t(U);
1988  break;
1989  }
1990  if (!isInt<8>(V))
1991  return false;
1992 
1993  MI->RemoveOperand(2);
1994  switch (Opc) {
1995  case Hexagon::S2_storerb_io:
1996  MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
1997  break;
1998  case Hexagon::S2_storerh_io:
1999  MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
2000  break;
2001  case Hexagon::S2_storeri_io:
2002  MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
2003  break;
2004  }
2006  return true;
2007 }
2008 
2009 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2010 // last instruction in a sequence that results in something equivalent to
2011 // the pack-halfwords. The intent is to cause the entire sequence to become
2012 // dead.
2013 bool BitSimplification::genPackhl(MachineInstr *MI,
2015  unsigned Opc = MI->getOpcode();
2016  if (Opc == Hexagon::S2_packhl)
2017  return false;
2018  BitTracker::RegisterRef Rs, Rt;
2019  if (!matchPackhl(RD.Reg, RC, Rs, Rt))
2020  return false;
2021  if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2022  !validateReg(Rt, Hexagon::S2_packhl, 2))
2023  return false;
2024 
2025  MachineBasicBlock &B = *MI->getParent();
2026  unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2027  DebugLoc DL = MI->getDebugLoc();
2028  auto At = MI->isPHI() ? B.getFirstNonPHI()
2030  BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
2031  .addReg(Rs.Reg, 0, Rs.Sub)
2032  .addReg(Rt.Reg, 0, Rt.Sub);
2033  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2034  BT.put(BitTracker::RegisterRef(NewR), RC);
2035  return true;
2036 }
2037 
2038 // If MI produces halfword of the input in the low half of the output,
2039 // replace it with zero-extend or extractu.
2040 bool BitSimplification::genExtractHalf(MachineInstr *MI,
2042  RegHalf L;
2043  // Check for halfword in low 16 bits, zeros elsewhere.
2044  if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2045  return false;
2046 
2047  unsigned Opc = MI->getOpcode();
2048  MachineBasicBlock &B = *MI->getParent();
2049  DebugLoc DL = MI->getDebugLoc();
2050 
2051  // Prefer zxth, since zxth can go in any slot, while extractu only in
2052  // slots 2 and 3.
2053  unsigned NewR = 0;
2054  auto At = MI->isPHI() ? B.getFirstNonPHI()
2056  if (L.Low && Opc != Hexagon::A2_zxth) {
2057  if (validateReg(L, Hexagon::A2_zxth, 1)) {
2058  NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2059  BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2060  .addReg(L.Reg, 0, L.Sub);
2061  }
2062  } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
2063  if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2064  NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2065  BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2066  .addReg(L.Reg, 0, L.Sub)
2067  .addImm(16);
2068  }
2069  }
2070  if (NewR == 0)
2071  return false;
2072  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2073  BT.put(BitTracker::RegisterRef(NewR), RC);
2074  return true;
2075 }
2076 
2077 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2078 // combine.
2079 bool BitSimplification::genCombineHalf(MachineInstr *MI,
2081  RegHalf L, H;
2082  // Check for combine h/l
2083  if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
2084  return false;
2085  // Do nothing if this is just a reg copy.
2086  if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2087  return false;
2088 
2089  unsigned Opc = MI->getOpcode();
2090  unsigned COpc = getCombineOpcode(H.Low, L.Low);
2091  if (COpc == Opc)
2092  return false;
2093  if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2094  return false;
2095 
2096  MachineBasicBlock &B = *MI->getParent();
2097  DebugLoc DL = MI->getDebugLoc();
2098  unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2099  auto At = MI->isPHI() ? B.getFirstNonPHI()
2101  BuildMI(B, At, DL, HII.get(COpc), NewR)
2102  .addReg(H.Reg, 0, H.Sub)
2103  .addReg(L.Reg, 0, L.Sub);
2104  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2105  BT.put(BitTracker::RegisterRef(NewR), RC);
2106  return true;
2107 }
2108 
2109 // If MI resets high bits of a register and keeps the lower ones, replace it
2110 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2111 bool BitSimplification::genExtractLow(MachineInstr *MI,
2113  unsigned Opc = MI->getOpcode();
2114  switch (Opc) {
2115  case Hexagon::A2_zxtb:
2116  case Hexagon::A2_zxth:
2117  case Hexagon::S2_extractu:
2118  return false;
2119  }
2120  if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
2121  int32_t Imm = MI->getOperand(2).getImm();
2122  if (isInt<10>(Imm))
2123  return false;
2124  }
2125 
2126  if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
2127  return false;
2128  unsigned W = RC.width();
2129  while (W > 0 && RC[W-1].is(0))
2130  W--;
2131  if (W == 0 || W == RC.width())
2132  return false;
2133  unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
2134  : (W == 16) ? Hexagon::A2_zxth
2135  : (W < 10) ? Hexagon::A2_andir
2136  : Hexagon::S2_extractu;
2137  MachineBasicBlock &B = *MI->getParent();
2138  DebugLoc DL = MI->getDebugLoc();
2139 
2140  for (auto &Op : MI->uses()) {
2141  if (!Op.isReg())
2142  continue;
2144  if (!BT.has(RS.Reg))
2145  continue;
2146  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2147  unsigned BN, BW;
2148  if (!HBS::getSubregMask(RS, BN, BW, MRI))
2149  continue;
2150  if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2151  continue;
2152  if (!validateReg(RS, NewOpc, 1))
2153  continue;
2154 
2155  unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2156  auto At = MI->isPHI() ? B.getFirstNonPHI()
2158  auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
2159  .addReg(RS.Reg, 0, RS.Sub);
2160  if (NewOpc == Hexagon::A2_andir)
2161  MIB.addImm((1 << W) - 1);
2162  else if (NewOpc == Hexagon::S2_extractu)
2163  MIB.addImm(W).addImm(0);
2164  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2165  BT.put(BitTracker::RegisterRef(NewR), RC);
2166  return true;
2167  }
2168  return false;
2169 }
2170 
2171 bool BitSimplification::genBitSplit(MachineInstr *MI,
2173  const RegisterSet &AVs) {
2174  if (!GenBitSplit)
2175  return false;
2176  if (MaxBitSplit.getNumOccurrences()) {
2177  if (CountBitSplit >= MaxBitSplit)
2178  return false;
2179  }
2180 
2181  unsigned Opc = MI->getOpcode();
2182  switch (Opc) {
2183  case Hexagon::A4_bitsplit:
2184  case Hexagon::A4_bitspliti:
2185  return false;
2186  }
2187 
2188  unsigned W = RC.width();
2189  if (W != 32)
2190  return false;
2191 
2192  auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
2193  unsigned Z = C.width();
2194  while (Z > 0 && C[Z-1].is(0))
2195  --Z;
2196  return C.width() - Z;
2197  };
2198 
2199  // Count the number of leading zeros in the target RC.
2200  unsigned Z = ctlz(RC);
2201  if (Z == 0 || Z == W)
2202  return false;
2203 
2204  // A simplistic analysis: assume the source register (the one being split)
2205  // is fully unknown, and that all its bits are self-references.
2206  const BitTracker::BitValue &B0 = RC[0];
2207  if (B0.Type != BitTracker::BitValue::Ref)
2208  return false;
2209 
2210  unsigned SrcR = B0.RefI.Reg;
2211  unsigned SrcSR = 0;
2212  unsigned Pos = B0.RefI.Pos;
2213 
2214  // All the non-zero bits should be consecutive bits from the same register.
2215  for (unsigned i = 1; i < W-Z; ++i) {
2216  const BitTracker::BitValue &V = RC[i];
2218  return false;
2219  if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
2220  return false;
2221  }
2222 
2223  // Now, find the other bitfield among AVs.
2224  for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2225  // The number of leading zeros here should be the number of trailing
2226  // non-zeros in RC.
2227  if (!BT.has(S))
2228  continue;
2229  const BitTracker::RegisterCell &SC = BT.lookup(S);
2230  if (SC.width() != W || ctlz(SC) != W-Z)
2231  continue;
2232  // The Z lower bits should now match SrcR.
2233  const BitTracker::BitValue &S0 = SC[0];
2234  if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
2235  continue;
2236  unsigned P = S0.RefI.Pos;
2237 
2238  if (Pos <= P && (Pos + W-Z) != P)
2239  continue;
2240  if (P < Pos && (P + Z) != Pos)
2241  continue;
2242  // The starting bitfield position must be at a subregister boundary.
2243  if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
2244  continue;
2245 
2246  unsigned I;
2247  for (I = 1; I < Z; ++I) {
2248  const BitTracker::BitValue &V = SC[I];
2250  break;
2251  if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
2252  break;
2253  }
2254  if (I != Z)
2255  continue;
2256 
2257  // Generate bitsplit where S is defined.
2258  if (MaxBitSplit.getNumOccurrences())
2259  CountBitSplit++;
2260  MachineInstr *DefS = MRI.getVRegDef(S);
2261  assert(DefS != nullptr);
2262  DebugLoc DL = DefS->getDebugLoc();
2263  MachineBasicBlock &B = *DefS->getParent();
2264  auto At = DefS->isPHI() ? B.getFirstNonPHI()
2266  if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
2267  SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2268  if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2269  continue;
2270  unsigned ImmOp = Pos <= P ? W-Z : Z;
2271 
2272  // Find an existing bitsplit instruction if one already exists.
2273  unsigned NewR = 0;
2274  for (MachineInstr *In : NewMIs) {
2275  if (In->getOpcode() != Hexagon::A4_bitspliti)
2276  continue;
2277  MachineOperand &Op1 = In->getOperand(1);
2278  if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
2279  continue;
2280  if (In->getOperand(2).getImm() != ImmOp)
2281  continue;
2282  // Check if the target register is available here.
2283  MachineOperand &Op0 = In->getOperand(0);
2284  MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
2285  assert(DefI != nullptr);
2286  if (!MDT.dominates(DefI, &*At))
2287  continue;
2288 
2289  // Found one that can be reused.
2290  assert(Op0.getSubReg() == 0);
2291  NewR = Op0.getReg();
2292  break;
2293  }
2294  if (!NewR) {
2295  NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2296  auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
2297  .addReg(SrcR, 0, SrcSR)
2298  .addImm(ImmOp);
2299  NewMIs.push_back(NewBS);
2300  }
2301  if (Pos <= P) {
2302  HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
2303  HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
2304  } else {
2305  HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
2306  HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
2307  }
2308  return true;
2309  }
2310 
2311  return false;
2312 }
2313 
2314 // Check for tstbit simplification opportunity, where the bit being checked
2315 // can be tracked back to another register. For example:
2316 // vreg2 = S2_lsr_i_r vreg1, 5
2317 // vreg3 = S2_tstbit_i vreg2, 0
2318 // =>
2319 // vreg3 = S2_tstbit_i vreg1, 5
2320 bool BitSimplification::simplifyTstbit(MachineInstr *MI,
2322  unsigned Opc = MI->getOpcode();
2323  if (Opc != Hexagon::S2_tstbit_i)
2324  return false;
2325 
2326  unsigned BN = MI->getOperand(2).getImm();
2327  BitTracker::RegisterRef RS = MI->getOperand(1);
2328  unsigned F, W;
2329  DebugLoc DL = MI->getDebugLoc();
2330  if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2331  return false;
2332  MachineBasicBlock &B = *MI->getParent();
2333  auto At = MI->isPHI() ? B.getFirstNonPHI()
2335 
2336  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2337  const BitTracker::BitValue &V = SC[F+BN];
2338  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
2339  const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
2340  // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2341  // a double register, need to use a subregister and adjust bit
2342  // number.
2344  BitTracker::RegisterRef RR(V.RefI.Reg, 0);
2345  if (TC == &Hexagon::DoubleRegsRegClass) {
2346  P = V.RefI.Pos;
2347  RR.Sub = Hexagon::isub_lo;
2348  if (P >= 32) {
2349  P -= 32;
2350  RR.Sub = Hexagon::isub_hi;
2351  }
2352  } else if (TC == &Hexagon::IntRegsRegClass) {
2353  P = V.RefI.Pos;
2354  }
2356  unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2357  BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2358  .addReg(RR.Reg, 0, RR.Sub)
2359  .addImm(P);
2360  HBS::replaceReg(RD.Reg, NewR, MRI);
2361  BT.put(NewR, RC);
2362  return true;
2363  }
2364  } else if (V.is(0) || V.is(1)) {
2365  unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2366  unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2367  BuildMI(B, At, DL, HII.get(NewOpc), NewR);
2368  HBS::replaceReg(RD.Reg, NewR, MRI);
2369  return true;
2370  }
2371 
2372  return false;
2373 }
2374 
2375 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2376 // some register from the AVs set. Create a new corresponding instruction
2377 // at the location of MI. The intent is to recognize situations where
2378 // a sequence of instructions performs an operation that is equivalent to
2379 // an extract operation, such as a shift left followed by a shift right.
2380 bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
2382  const RegisterSet &AVs) {
2383  if (!GenExtract)
2384  return false;
2385  if (MaxExtract.getNumOccurrences()) {
2386  if (CountExtract >= MaxExtract)
2387  return false;
2388  CountExtract++;
2389  }
2390 
2391  unsigned W = RC.width();
2392  unsigned RW = W;
2393  unsigned Len;
2394  bool Signed;
2395 
2396  // The code is mostly class-independent, except for the part that generates
2397  // the extract instruction, and establishes the source register (in case it
2398  // needs to use a subregister).
2399  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2400  if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2401  return false;
2402  assert(RD.Sub == 0);
2403 
2404  // Observation:
2405  // If the cell has a form of 00..0xx..x with k zeros and n remaining
2406  // bits, this could be an extractu of the n bits, but it could also be
2407  // an extractu of a longer field which happens to have 0s in the top
2408  // bit positions.
2409  // The same logic applies to sign-extended fields.
2410  //
2411  // Do not check for the extended extracts, since it would expand the
2412  // search space quite a bit. The search may be expensive as it is.
2413 
2414  const BitTracker::BitValue &TopV = RC[W-1];
2415 
2416  // Eliminate candidates that have self-referential bits, since they
2417  // cannot be extracts from other registers. Also, skip registers that
2418  // have compile-time constant values.
2419  bool IsConst = true;
2420  for (unsigned I = 0; I != W; ++I) {
2421  const BitTracker::BitValue &V = RC[I];
2422  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
2423  return false;
2424  IsConst = IsConst && (V.is(0) || V.is(1));
2425  }
2426  if (IsConst)
2427  return false;
2428 
2429  if (TopV.is(0) || TopV.is(1)) {
2430  bool S = TopV.is(1);
2431  for (--W; W > 0 && RC[W-1].is(S); --W)
2432  ;
2433  Len = W;
2434  Signed = S;
2435  // The sign bit must be a part of the field being extended.
2436  if (Signed)
2437  ++Len;
2438  } else {
2439  // This could still be a sign-extended extract.
2441  if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
2442  return false;
2443  for (--W; W > 0 && RC[W-1] == TopV; --W)
2444  ;
2445  // The top bits of RC are copies of TopV. One occurrence of TopV will
2446  // be a part of the field.
2447  Len = W + 1;
2448  Signed = true;
2449  }
2450 
2451  // This would be just a copy. It should be handled elsewhere.
2452  if (Len == RW)
2453  return false;
2454 
2455  DEBUG({
2456  dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub)
2457  << ", MI: " << *MI;
2458  dbgs() << "Cell: " << RC << '\n';
2459  dbgs() << "Expected bitfield size: " << Len << " bits, "
2460  << (Signed ? "sign" : "zero") << "-extended\n";
2461  });
2462 
2463  bool Changed = false;
2464 
2465  for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
2466  if (!BT.has(R))
2467  continue;
2468  const BitTracker::RegisterCell &SC = BT.lookup(R);
2469  unsigned SW = SC.width();
2470 
2471  // The source can be longer than the destination, as long as its size is
2472  // a multiple of the size of the destination. Also, we would need to be
2473  // able to refer to the subregister in the source that would be of the
2474  // same size as the destination, but only check the sizes here.
2475  if (SW < RW || (SW % RW) != 0)
2476  continue;
2477 
2478  // The field can start at any offset in SC as long as it contains Len
2479  // bits and does not cross subregister boundary (if the source register
2480  // is longer than the destination).
2481  unsigned Off = 0;
2482  while (Off <= SW-Len) {
2483  unsigned OE = (Off+Len)/RW;
2484  if (OE != Off/RW) {
2485  // The assumption here is that if the source (R) is longer than the
2486  // destination, then the destination is a sequence of words of
2487  // size RW, and each such word in R can be accessed via a subregister.
2488  //
2489  // If the beginning and the end of the field cross the subregister
2490  // boundary, advance to the next subregister.
2491  Off = OE*RW;
2492  continue;
2493  }
2494  if (HBS::isEqual(RC, 0, SC, Off, Len))
2495  break;
2496  ++Off;
2497  }
2498 
2499  if (Off > SW-Len)
2500  continue;
2501 
2502  // Found match.
2503  unsigned ExtOpc = 0;
2504  if (Off == 0) {
2505  if (Len == 8)
2506  ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2507  else if (Len == 16)
2508  ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2509  else if (Len < 10 && !Signed)
2510  ExtOpc = Hexagon::A2_andir;
2511  }
2512  if (ExtOpc == 0) {
2513  ExtOpc =
2514  Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
2515  : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2516  }
2517  unsigned SR = 0;
2518  // This only recognizes isub_lo and isub_hi.
2519  if (RW != SW && RW*2 != SW)
2520  continue;
2521  if (RW != SW)
2522  SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2523  Off = Off % RW;
2524 
2525  if (!validateReg({R,SR}, ExtOpc, 1))
2526  continue;
2527 
2528  // Don't generate the same instruction as the one being optimized.
2529  if (MI->getOpcode() == ExtOpc) {
2530  // All possible ExtOpc's have the source in operand(1).
2531  const MachineOperand &SrcOp = MI->getOperand(1);
2532  if (SrcOp.getReg() == R)
2533  continue;
2534  }
2535 
2536  DebugLoc DL = MI->getDebugLoc();
2537  MachineBasicBlock &B = *MI->getParent();
2538  unsigned NewR = MRI.createVirtualRegister(FRC);
2539  auto At = MI->isPHI() ? B.getFirstNonPHI()
2541  auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
2542  .addReg(R, 0, SR);
2543  switch (ExtOpc) {
2544  case Hexagon::A2_sxtb:
2545  case Hexagon::A2_zxtb:
2546  case Hexagon::A2_sxth:
2547  case Hexagon::A2_zxth:
2548  break;
2549  case Hexagon::A2_andir:
2550  MIB.addImm((1u << Len) - 1);
2551  break;
2552  case Hexagon::S4_extract:
2553  case Hexagon::S2_extractu:
2554  case Hexagon::S4_extractp:
2555  case Hexagon::S2_extractup:
2556  MIB.addImm(Len)
2557  .addImm(Off);
2558  break;
2559  default:
2560  llvm_unreachable("Unexpected opcode");
2561  }
2562 
2563  HBS::replaceReg(RD.Reg, NewR, MRI);
2564  BT.put(BitTracker::RegisterRef(NewR), RC);
2565  Changed = true;
2566  break;
2567  }
2568 
2569  return Changed;
2570 }
2571 
2572 bool BitSimplification::processBlock(MachineBasicBlock &B,
2573  const RegisterSet &AVs) {
2574  if (!BT.reached(&B))
2575  return false;
2576  bool Changed = false;
2577  RegisterSet AVB = AVs;
2578  RegisterSet Defs;
2579 
2580  for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
2581  MachineInstr *MI = &*I;
2582  Defs.clear();
2583  HBS::getInstrDefs(*MI, Defs);
2584 
2585  unsigned Opc = MI->getOpcode();
2586  if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2587  continue;
2588 
2589  if (MI->mayStore()) {
2590  bool T = genStoreUpperHalf(MI);
2591  T = T || genStoreImmediate(MI);
2592  Changed |= T;
2593  continue;
2594  }
2595 
2596  if (Defs.count() != 1)
2597  continue;
2598  const MachineOperand &Op0 = MI->getOperand(0);
2599  if (!Op0.isReg() || !Op0.isDef())
2600  continue;
2601  BitTracker::RegisterRef RD = Op0;
2602  if (!BT.has(RD.Reg))
2603  continue;
2604  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2605  const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
2606 
2607  if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
2608  bool T = genPackhl(MI, RD, RC);
2609  T = T || simplifyExtractLow(MI, RD, RC, AVB);
2610  Changed |= T;
2611  continue;
2612  }
2613 
2614  if (FRC->getID() == Hexagon::IntRegsRegClassID) {
2615  bool T = genBitSplit(MI, RD, RC, AVB);
2616  T = T || simplifyExtractLow(MI, RD, RC, AVB);
2617  T = T || genExtractHalf(MI, RD, RC);
2618  T = T || genCombineHalf(MI, RD, RC);
2619  T = T || genExtractLow(MI, RD, RC);
2620  Changed |= T;
2621  continue;
2622  }
2623 
2624  if (FRC->getID() == Hexagon::PredRegsRegClassID) {
2625  bool T = simplifyTstbit(MI, RD, RC);
2626  Changed |= T;
2627  continue;
2628  }
2629  }
2630  return Changed;
2631 }
2632 
2633 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
2634  if (skipFunction(*MF.getFunction()))
2635  return false;
2636 
2637  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2638  auto &HRI = *HST.getRegisterInfo();
2639  auto &HII = *HST.getInstrInfo();
2640 
2641  MDT = &getAnalysis<MachineDominatorTree>();
2642  MachineRegisterInfo &MRI = MF.getRegInfo();
2643  bool Changed;
2644 
2645  Changed = DeadCodeElimination(MF, *MDT).run();
2646 
2647  const HexagonEvaluator HE(HRI, MRI, HII, MF);
2648  BitTracker BT(HE, MF);
2649  DEBUG(BT.trace(true));
2650  BT.run();
2651 
2652  MachineBasicBlock &Entry = MF.front();
2653 
2654  RegisterSet AIG; // Available registers for IG.
2655  ConstGeneration ImmG(BT, HII, MRI);
2656  Changed |= visitBlock(Entry, ImmG, AIG);
2657 
2658  RegisterSet ARE; // Available registers for RIE.
2659  RedundantInstrElimination RIE(BT, HII, HRI, MRI);
2660  bool Ried = visitBlock(Entry, RIE, ARE);
2661  if (Ried) {
2662  Changed = true;
2663  BT.run();
2664  }
2665 
2666  RegisterSet ACG; // Available registers for CG.
2667  CopyGeneration CopyG(BT, HII, HRI, MRI);
2668  Changed |= visitBlock(Entry, CopyG, ACG);
2669 
2670  RegisterSet ACP; // Available registers for CP.
2671  CopyPropagation CopyP(HRI, MRI);
2672  Changed |= visitBlock(Entry, CopyP, ACP);
2673 
2674  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2675 
2676  BT.run();
2677  RegisterSet ABS; // Available registers for BS.
2678  BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
2679  Changed |= visitBlock(Entry, BitS, ABS);
2680 
2681  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2682 
2683  if (Changed) {
2684  for (auto &B : MF)
2685  for (auto &I : B)
2686  I.clearKillInfo();
2687  DeadCodeElimination(MF, *MDT).run();
2688  }
2689  return Changed;
2690 }
2691 
2692 // Recognize loops where the code at the end of the loop matches the code
2693 // before the entry of the loop, and the matching code is such that is can
2694 // be simplified. This pass relies on the bit simplification above and only
2695 // prepares code in a way that can be handled by the bit simplifcation.
2696 //
2697 // This is the motivating testcase (and explanation):
2698 //
2699 // {
2700 // loop0(.LBB0_2, r1) // %for.body.preheader
2701 // r5:4 = memd(r0++#8)
2702 // }
2703 // {
2704 // r3 = lsr(r4, #16)
2705 // r7:6 = combine(r5, r5)
2706 // }
2707 // {
2708 // r3 = insert(r5, #16, #16)
2709 // r7:6 = vlsrw(r7:6, #16)
2710 // }
2711 // .LBB0_2:
2712 // {
2713 // memh(r2+#4) = r5
2714 // memh(r2+#6) = r6 # R6 is really R5.H
2715 // }
2716 // {
2717 // r2 = add(r2, #8)
2718 // memh(r2+#0) = r4
2719 // memh(r2+#2) = r3 # R3 is really R4.H
2720 // }
2721 // {
2722 // r5:4 = memd(r0++#8)
2723 // }
2724 // { # "Shuffling" code that sets up R3 and R6
2725 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2726 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2727 // } # the stores if the code was at the beginning
2728 // { # of the loop iteration. Since the same code
2729 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2730 // r7:6 = vlsrw(r7:6, #16) # there.
2731 // }:endloop0
2732 //
2733 //
2734 // The outcome:
2735 //
2736 // {
2737 // loop0(.LBB0_2, r1)
2738 // r5:4 = memd(r0++#8)
2739 // }
2740 // .LBB0_2:
2741 // {
2742 // memh(r2+#4) = r5
2743 // memh(r2+#6) = r5.h
2744 // }
2745 // {
2746 // r2 = add(r2, #8)
2747 // memh(r2+#0) = r4
2748 // memh(r2+#2) = r4.h
2749 // }
2750 // {
2751 // r5:4 = memd(r0++#8)
2752 // }:endloop0
2753 
2754 namespace llvm {
2755 
2758 
2759 } // end namespace llvm
2760 
2761 namespace {
2762 
2763  class HexagonLoopRescheduling : public MachineFunctionPass {
2764  public:
2765  static char ID;
2766 
2767  HexagonLoopRescheduling() : MachineFunctionPass(ID) {
2769  }
2770 
2771  bool runOnMachineFunction(MachineFunction &MF) override;
2772 
2773  private:
2774  const HexagonInstrInfo *HII = nullptr;
2775  const HexagonRegisterInfo *HRI = nullptr;
2776  MachineRegisterInfo *MRI = nullptr;
2777  BitTracker *BTP = nullptr;
2778 
2779  struct LoopCand {
2780  LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
2781  MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
2782 
2783  MachineBasicBlock *LB, *PB, *EB;
2784  };
2785  using InstrList = std::vector<MachineInstr *>;
2786  struct InstrGroup {
2787  BitTracker::RegisterRef Inp, Out;
2788  InstrList Ins;
2789  };
2790  struct PhiInfo {
2791  PhiInfo(MachineInstr &P, MachineBasicBlock &B);
2792 
2793  unsigned DefR;
2794  BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
2795  MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block
2796  };
2797 
2798  static unsigned getDefReg(const MachineInstr *MI);
2799  bool isConst(unsigned Reg) const;
2800  bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
2801  bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
2802  bool isShuffleOf(unsigned OutR, unsigned InpR) const;
2803  bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
2804  unsigned &InpR2) const;
2805  void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
2806  MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
2807  bool processLoop(LoopCand &C);
2808  };
2809 
2810 } // end anonymous namespace
2811 
2813 
2814 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
2815  "Hexagon Loop Rescheduling", false, false)
2816 
2817 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
2818  MachineBasicBlock &B) {
2819  DefR = HexagonLoopRescheduling::getDefReg(&P);
2820  LB = &B;
2821  PB = nullptr;
2822  for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2823  const MachineOperand &OpB = P.getOperand(i+1);
2824  if (OpB.getMBB() == &B) {
2825  LR = P.getOperand(i);
2826  continue;
2827  }
2828  PB = OpB.getMBB();
2829  PR = P.getOperand(i);
2830  }
2831 }
2832 
2833 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
2834  RegisterSet Defs;
2835  HBS::getInstrDefs(*MI, Defs);
2836  if (Defs.count() != 1)
2837  return 0;
2838  return Defs.find_first();
2839 }
2840 
2841 bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
2842  if (!BTP->has(Reg))
2843  return false;
2844  const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
2845  for (unsigned i = 0, w = RC.width(); i < w; ++i) {
2846  const BitTracker::BitValue &V = RC[i];
2847  if (!V.is(0) && !V.is(1))
2848  return false;
2849  }
2850  return true;
2851 }
2852 
2853 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
2854  unsigned DefR) const {
2855  unsigned Opc = MI->getOpcode();
2856  switch (Opc) {
2857  case TargetOpcode::COPY:
2858  case Hexagon::S2_lsr_i_r:
2859  case Hexagon::S2_asr_i_r:
2860  case Hexagon::S2_asl_i_r:
2861  case Hexagon::S2_lsr_i_p:
2862  case Hexagon::S2_asr_i_p:
2863  case Hexagon::S2_asl_i_p:
2864  case Hexagon::S2_insert:
2865  case Hexagon::A2_or:
2866  case Hexagon::A2_orp:
2867  case Hexagon::A2_and:
2868  case Hexagon::A2_andp:
2869  case Hexagon::A2_combinew:
2870  case Hexagon::A4_combineri:
2871  case Hexagon::A4_combineir:
2872  case Hexagon::A2_combineii:
2873  case Hexagon::A4_combineii:
2874  case Hexagon::A2_combine_ll:
2875  case Hexagon::A2_combine_lh:
2876  case Hexagon::A2_combine_hl:
2877  case Hexagon::A2_combine_hh:
2878  return true;
2879  }
2880  return false;
2881 }
2882 
2883 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
2884  unsigned InpR) const {
2885  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
2886  const MachineOperand &Op = MI->getOperand(i);
2887  if (!Op.isReg())
2888  continue;
2889  if (Op.getReg() == InpR)
2890  return i == n-1;
2891  }
2892  return false;
2893 }
2894 
2895 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
2896  if (!BTP->has(OutR) || !BTP->has(InpR))
2897  return false;
2898  const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
2899  for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
2900  const BitTracker::BitValue &V = OutC[i];
2902  continue;
2903  if (V.RefI.Reg != InpR)
2904  return false;
2905  }
2906  return true;
2907 }
2908 
2909 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
2910  unsigned OutR2, unsigned &InpR2) const {
2911  if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
2912  return false;
2913  const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
2914  const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
2915  unsigned W = OutC1.width();
2916  unsigned MatchR = 0;
2917  if (W != OutC2.width())
2918  return false;
2919  for (unsigned i = 0; i < W; ++i) {
2920  const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
2921  if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
2922  return false;
2923  if (V1.Type != BitTracker::BitValue::Ref)
2924  continue;
2925  if (V1.RefI.Pos != V2.RefI.Pos)
2926  return false;
2927  if (V1.RefI.Reg != InpR1)
2928  return false;
2929  if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
2930  return false;
2931  if (!MatchR)
2932  MatchR = V2.RefI.Reg;
2933  else if (V2.RefI.Reg != MatchR)
2934  return false;
2935  }
2936  InpR2 = MatchR;
2937  return true;
2938 }
2939 
2940 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
2941  MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
2942  unsigned NewPredR) {
2944 
2945  const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
2946  unsigned PhiR = MRI->createVirtualRegister(PhiRC);
2947  BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
2948  .addReg(NewPredR)
2949  .addMBB(&PB)
2950  .addReg(G.Inp.Reg)
2951  .addMBB(&LB);
2952  RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
2953 
2954  for (unsigned i = G.Ins.size(); i > 0; --i) {
2955  const MachineInstr *SI = G.Ins[i-1];
2956  unsigned DR = getDefReg(SI);
2957  const TargetRegisterClass *RC = MRI->getRegClass(DR);
2958  unsigned NewDR = MRI->createVirtualRegister(RC);
2959  DebugLoc DL = SI->getDebugLoc();
2960 
2961  auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
2962  for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
2963  const MachineOperand &Op = SI->getOperand(j);
2964  if (!Op.isReg()) {
2965  MIB.add(Op);
2966  continue;
2967  }
2968  if (!Op.isUse())
2969  continue;
2970  unsigned UseR = RegMap[Op.getReg()];
2971  MIB.addReg(UseR, 0, Op.getSubReg());
2972  }
2973  RegMap.insert(std::make_pair(DR, NewDR));
2974  }
2975 
2976  HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
2977 }
2978 
2979 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
2980  DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
2981  std::vector<PhiInfo> Phis;
2982  for (auto &I : *C.LB) {
2983  if (!I.isPHI())
2984  break;
2985  unsigned PR = getDefReg(&I);
2986  if (isConst(PR))
2987  continue;
2988  bool BadUse = false, GoodUse = false;
2989  for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
2990  MachineInstr *UseI = UI->getParent();
2991  if (UseI->getParent() != C.LB) {
2992  BadUse = true;
2993  break;
2994  }
2995  if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
2996  GoodUse = true;
2997  }
2998  if (BadUse || !GoodUse)
2999  continue;
3000 
3001  Phis.push_back(PhiInfo(I, *C.LB));
3002  }
3003 
3004  DEBUG({
3005  dbgs() << "Phis: {";
3006  for (auto &I : Phis) {
3007  dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
3008  << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
3009  << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
3010  << I.LB->getNumber() << ')';
3011  }
3012  dbgs() << " }\n";
3013  });
3014 
3015  if (Phis.empty())
3016  return false;
3017 
3018  bool Changed = false;
3019  InstrList ShufIns;
3020 
3021  // Go backwards in the block: for each bit shuffling instruction, check
3022  // if that instruction could potentially be moved to the front of the loop:
3023  // the output of the loop cannot be used in a non-shuffling instruction
3024  // in this loop.
3025  for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
3026  if (I->isTerminator())
3027  continue;
3028  if (I->isPHI())
3029  break;
3030 
3031  RegisterSet Defs;
3032  HBS::getInstrDefs(*I, Defs);
3033  if (Defs.count() != 1)
3034  continue;
3035  unsigned DefR = Defs.find_first();
3037  continue;
3038  if (!isBitShuffle(&*I, DefR))
3039  continue;
3040 
3041  bool BadUse = false;
3042  for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
3043  MachineInstr *UseI = UI->getParent();
3044  if (UseI->getParent() == C.LB) {
3045  if (UseI->isPHI()) {
3046  // If the use is in a phi node in this loop, then it should be
3047  // the value corresponding to the back edge.
3048  unsigned Idx = UI.getOperandNo();
3049  if (UseI->getOperand(Idx+1).getMBB() != C.LB)
3050  BadUse = true;
3051  } else {
3052  auto F = find(ShufIns, UseI);
3053  if (F == ShufIns.end())
3054  BadUse = true;
3055  }
3056  } else {
3057  // There is a use outside of the loop, but there is no epilog block
3058  // suitable for a copy-out.
3059  if (C.EB == nullptr)
3060  BadUse = true;
3061  }
3062  if (BadUse)
3063  break;
3064  }
3065 
3066  if (BadUse)
3067  continue;
3068  ShufIns.push_back(&*I);
3069  }
3070 
3071  // Partition the list of shuffling instructions into instruction groups,
3072  // where each group has to be moved as a whole (i.e. a group is a chain of
3073  // dependent instructions). A group produces a single live output register,
3074  // which is meant to be the input of the loop phi node (although this is
3075  // not checked here yet). It also uses a single register as its input,
3076  // which is some value produced in the loop body. After moving the group
3077  // to the beginning of the loop, that input register would need to be
3078  // the loop-carried register (through a phi node) instead of the (currently
3079  // loop-carried) output register.
3080  using InstrGroupList = std::vector<InstrGroup>;
3081  InstrGroupList Groups;
3082 
3083  for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3084  MachineInstr *SI = ShufIns[i];
3085  if (SI == nullptr)
3086  continue;
3087 
3088  InstrGroup G;
3089  G.Ins.push_back(SI);
3090  G.Out.Reg = getDefReg(SI);
3091  RegisterSet Inputs;
3092  HBS::getInstrUses(*SI, Inputs);
3093 
3094  for (unsigned j = i+1; j < n; ++j) {
3095  MachineInstr *MI = ShufIns[j];
3096  if (MI == nullptr)
3097  continue;
3098  RegisterSet Defs;
3099  HBS::getInstrDefs(*MI, Defs);
3100  // If this instruction does not define any pending inputs, skip it.
3101  if (!Defs.intersects(Inputs))
3102  continue;
3103  // Otherwise, add it to the current group and remove the inputs that
3104  // are defined by MI.
3105  G.Ins.push_back(MI);
3106  Inputs.remove(Defs);
3107  // Then add all registers used by MI.
3108  HBS::getInstrUses(*MI, Inputs);
3109  ShufIns[j] = nullptr;
3110  }
3111 
3112  // Only add a group if it requires at most one register.
3113  if (Inputs.count() > 1)
3114  continue;
3115  auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3116  return G.Out.Reg == P.LR.Reg;
3117  };
3118  if (llvm::find_if(Phis, LoopInpEq) == Phis.end())
3119  continue;
3120 
3121  G.Inp.Reg = Inputs.find_first();
3122  Groups.push_back(G);
3123  }
3124 
3125  DEBUG({
3126  for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
3127  InstrGroup &G = Groups[i];
3128  dbgs() << "Group[" << i << "] inp: "
3129  << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
3130  << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
3131  for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
3132  dbgs() << " " << *G.Ins[j];
3133  }
3134  });
3135 
3136  for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
3137  InstrGroup &G = Groups[i];
3138  if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
3139  continue;
3140  auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3141  return G.Out.Reg == P.LR.Reg;
3142  };
3143  auto F = llvm::find_if(Phis, LoopInpEq);
3144  if (F == Phis.end())
3145  continue;
3146  unsigned PrehR = 0;
3147  if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
3148  const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
3149  unsigned Opc = DefPrehR->getOpcode();
3150  if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3151  continue;
3152  if (!DefPrehR->getOperand(1).isImm())
3153  continue;
3154  if (DefPrehR->getOperand(1).getImm() != 0)
3155  continue;
3156  const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
3157  if (RC != MRI->getRegClass(F->PR.Reg)) {
3158  PrehR = MRI->createVirtualRegister(RC);
3159  unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3160  : Hexagon::A2_tfrpi;
3161  auto T = C.PB->getFirstTerminator();
3162  DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
3163  BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
3164  .addImm(0);
3165  } else {
3166  PrehR = F->PR.Reg;
3167  }
3168  }
3169  // isSameShuffle could match with PrehR being of a wider class than
3170  // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3171  // it would match for the input being a 32-bit register, and PrehR
3172  // being a 64-bit register (where the low 32 bits match). This could
3173  // be handled, but for now skip these cases.
3174  if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
3175  continue;
3176  moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
3177  Changed = true;
3178  }
3179 
3180  return Changed;
3181 }
3182 
3183 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
3184  if (skipFunction(*MF.getFunction()))
3185  return false;
3186 
3187  auto &HST = MF.getSubtarget<HexagonSubtarget>();
3188  HII = HST.getInstrInfo();
3189  HRI = HST.getRegisterInfo();
3190  MRI = &MF.getRegInfo();
3191  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
3192  BitTracker BT(HE, MF);
3193  DEBUG(BT.trace(true));
3194  BT.run();
3195  BTP = &BT;
3196 
3197  std::vector<LoopCand> Cand;
3198 
3199  for (auto &B : MF) {
3200  if (B.pred_size() != 2 || B.succ_size() != 2)
3201  continue;
3202  MachineBasicBlock *PB = nullptr;
3203  bool IsLoop = false;
3204  for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
3205  if (*PI != &B)
3206  PB = *PI;
3207  else
3208  IsLoop = true;
3209  }
3210  if (!IsLoop)
3211  continue;
3212 
3213  MachineBasicBlock *EB = nullptr;
3214  for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
3215  if (*SI == &B)
3216  continue;
3217  // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3218  // edge from B to EP is non-critical.
3219  if ((*SI)->pred_size() == 1)
3220  EB = *SI;
3221  break;
3222  }
3223 
3224  Cand.push_back(LoopCand(&B, PB, EB));
3225  }
3226 
3227  bool Changed = false;
3228  for (auto &C : Cand)
3229  Changed |= processLoop(C);
3230 
3231  return Changed;
3232 }
3233 
3234 //===----------------------------------------------------------------------===//
3235 // Public Constructor Functions
3236 //===----------------------------------------------------------------------===//
3237 
3239  return new HexagonLoopRescheduling();
3240 }
3241 
3243  return new HexagonBitSimplify();
3244 }
INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", "Hexagon bit simplification", false, false) INITIALIZE_PASS_END(HexagonBitSimplify
uint64_t CallInst * C
void push_back(const T &Elt)
Definition: SmallVector.h:212
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
void trace(bool On=false)
Definition: BitTracker.h:50
static unsigned virtReg2Index(unsigned Reg)
Convert a virtual register number to a 0-based index.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:365
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:281
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:384
Y = RRC X, rotate right via carry.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:45
void visit(const MachineInstr &MI)
unsigned getSubReg() const
bool isInlineAsm() const
Definition: MachineInstr.h:832
bool test(unsigned Idx) const
Definition: BitVector.h:502
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
A debug info location.
Definition: DebugLoc.h:34
F(f)
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
bool isPHI() const
Definition: MachineInstr.h:826
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void clearKillInfo()
Clears kill flags on all operands.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:367
static use_iterator use_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static ManagedStatic< DebugCounter > DC
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:332
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:368
const HexagonRegisterInfo * getRegisterInfo() const override
std::set< RegisterRef > RegisterSet
Definition: RDFGraph.h:413
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:340
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:474
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
unsigned getID() const
Return the register class ID number.
zlib-gnu style compression
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
static bool isEqual(const Function &Caller, const Function &Callee)
BitVector & operator|=(const BitVector &RHS)
Definition: BitVector.h:586
#define T
const RegList & Regs
FunctionPass * createHexagonBitSimplify()
Base class for the actual dominator tree node.
bool reached(const MachineBasicBlock *B) const
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
reverse_iterator rend()
static cl::opt< unsigned > MaxExtract("hexbit-max-extract", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
reverse_iterator rbegin()
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:801
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
NodeT * getBlock() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:639
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
MachineInstrBundleIterator< MachineInstr > iterator
static cl::opt< bool > GenBitSplit("hexbit-bitsplit", cl::Hidden, cl::init(true), cl::desc("Generate bitsplit instructions"))
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:507
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const RegisterCell & lookup(unsigned Reg) const
Definition: BitTracker.h:316
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define H(x, y, z)
Definition: MD5.cpp:57
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:181
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:439
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:774
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:144
static const unsigned End
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
FunctionPass * createHexagonLoopRescheduling()
bool isCopy() const
Definition: MachineInstr.h:857
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< unsigned > MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
static cl::opt< bool > GenExtract("hexbit-extract", cl::Hidden, cl::init(true), cl::desc("Generate extract instructions"))
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:788
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
void initializeHexagonBitSimplifyPass(PassRegistry &Registry)
static unsigned CountExtract
iterator_range< use_iterator > use_operands(unsigned Reg) const
bool isDebugValue() const
Definition: MachineInstr.h:816
MachineOperand class - Representation of each machine instruction operand.
reference operator[](unsigned Idx)
Definition: BitVector.h:491
Hexagon bit simplification
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
CHAIN = SC CHAIN, Imm128 - System call.
static cl::opt< bool > PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands"))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
bool is(unsigned T) const
Definition: BitTracker.h:168
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
int64_t getImm() const
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:393
unsigned succ_size() const
INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", "Hexagon Loop Rescheduling", false, false) HexagonLoopRescheduling
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned CountBitSplit
Representation of each machine instruction.
Definition: MachineInstr.h:59
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:538
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
BitTracker BT
Definition: BitTracker.cpp:74
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void setSubReg(unsigned subReg)
bool has(unsigned Reg) const
Definition: BitTracker.h:311
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:170
void markUsesInDebugValueAsUndef(unsigned Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void initializeHexagonLoopReschedulingPass(PassRegistry &)
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
RegisterCell get(RegisterRef RR) const
Definition: BitTracker.cpp:966
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:276
const HexagonInstrInfo * getInstrInfo() const override
void put(RegisterRef RR, const RegisterCell &RC)
Definition: BitTracker.cpp:970
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
auto find_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:795
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:385
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
unsigned getHexagonSubRegIndex(const TargetRegisterClass &RC, unsigned GenIdx) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.