LLVM  15.0.0git
HexagonBitSimplify.cpp
Go to the documentation of this file.
1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
29 #include "llvm/IR/DebugLoc.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/Pass.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cstdint>
42 #include <deque>
43 #include <iterator>
44 #include <limits>
45 #include <utility>
46 #include <vector>
47 
48 #define DEBUG_TYPE "hexbit"
49 
50 using namespace llvm;
51 
52 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
53  cl::init(true), cl::desc("Preserve subregisters in tied operands"));
54 static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
55  cl::init(true), cl::desc("Generate extract instructions"));
56 static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
57  cl::init(true), cl::desc("Generate bitsplit instructions"));
58 
59 static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
61 static unsigned CountExtract = 0;
62 static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
64 static unsigned CountBitSplit = 0;
65 
66 static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit",
67  cl::Hidden, cl::init(1000));
68 
69 namespace llvm {
70 
73 
74 } // end namespace llvm
75 
76 namespace {
77 
78  // Set of virtual registers, based on BitVector.
79  struct RegisterSet {
80  RegisterSet() = default;
81  explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {}
82  RegisterSet(const RegisterSet &RS) = default;
83 
84  void clear() {
85  Bits.clear();
86  LRU.clear();
87  }
88 
89  unsigned count() const {
90  return Bits.count();
91  }
92 
93  unsigned find_first() const {
94  int First = Bits.find_first();
95  if (First < 0)
96  return 0;
97  return x2v(First);
98  }
99 
100  unsigned find_next(unsigned Prev) const {
101  int Next = Bits.find_next(v2x(Prev));
102  if (Next < 0)
103  return 0;
104  return x2v(Next);
105  }
106 
107  RegisterSet &insert(unsigned R) {
108  unsigned Idx = v2x(R);
109  ensure(Idx);
110  bool Exists = Bits.test(Idx);
111  Bits.set(Idx);
112  if (!Exists) {
113  LRU.push_back(Idx);
114  if (LRU.size() > RegisterSetLimit) {
115  unsigned T = LRU.front();
116  Bits.reset(T);
117  LRU.pop_front();
118  }
119  }
120  return *this;
121  }
122  RegisterSet &remove(unsigned R) {
123  unsigned Idx = v2x(R);
124  if (Idx < Bits.size()) {
125  bool Exists = Bits.test(Idx);
126  Bits.reset(Idx);
127  if (Exists) {
128  auto F = llvm::find(LRU, Idx);
129  assert(F != LRU.end());
130  LRU.erase(F);
131  }
132  }
133  return *this;
134  }
135 
136  RegisterSet &insert(const RegisterSet &Rs) {
137  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
138  insert(R);
139  return *this;
140  }
141  RegisterSet &remove(const RegisterSet &Rs) {
142  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
143  remove(R);
144  return *this;
145  }
146 
147  bool operator[](unsigned R) const {
148  unsigned Idx = v2x(R);
149  return Idx < Bits.size() ? Bits[Idx] : false;
150  }
151  bool has(unsigned R) const {
152  unsigned Idx = v2x(R);
153  if (Idx >= Bits.size())
154  return false;
155  return Bits.test(Idx);
156  }
157 
158  bool empty() const {
159  return !Bits.any();
160  }
161  bool includes(const RegisterSet &Rs) const {
162  // A.test(B) <=> A-B != {}
163  return !Rs.Bits.test(Bits);
164  }
165  bool intersects(const RegisterSet &Rs) const {
166  return Bits.anyCommon(Rs.Bits);
167  }
168 
169  private:
170  BitVector Bits;
171  std::deque<unsigned> LRU;
172 
173  void ensure(unsigned Idx) {
174  if (Bits.size() <= Idx)
175  Bits.resize(std::max(Idx+1, 32U));
176  }
177 
178  static inline unsigned v2x(unsigned v) {
179  return Register::virtReg2Index(v);
180  }
181 
182  static inline unsigned x2v(unsigned x) {
183  return Register::index2VirtReg(x);
184  }
185  };
186 
187  struct PrintRegSet {
188  PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
189  : RS(S), TRI(RI) {}
190 
191  friend raw_ostream &operator<< (raw_ostream &OS,
192  const PrintRegSet &P);
193 
194  private:
195  const RegisterSet &RS;
196  const TargetRegisterInfo *TRI;
197  };
198 
199  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
201  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
202  OS << '{';
203  for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
204  OS << ' ' << printReg(R, P.TRI);
205  OS << " }";
206  return OS;
207  }
208 
209  class Transformation;
210 
211  class HexagonBitSimplify : public MachineFunctionPass {
212  public:
213  static char ID;
214 
215  HexagonBitSimplify() : MachineFunctionPass(ID) {}
216 
217  StringRef getPassName() const override {
218  return "Hexagon bit simplification";
219  }
220 
221  void getAnalysisUsage(AnalysisUsage &AU) const override {
225  }
226 
227  bool runOnMachineFunction(MachineFunction &MF) override;
228 
229  static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
230  static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
231  static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
232  const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
233  static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
234  uint16_t W);
235  static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
236  uint16_t W, uint64_t &U);
237  static bool replaceReg(Register OldR, Register NewR,
239  static bool getSubregMask(const BitTracker::RegisterRef &RR,
240  unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
241  static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR,
243  static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR,
244  unsigned NewSR, MachineRegisterInfo &MRI);
245  static bool parseRegSequence(const MachineInstr &I,
247  const MachineRegisterInfo &MRI);
248 
249  static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
250  uint16_t Begin);
251  static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
252  uint16_t Begin, const HexagonInstrInfo &HII);
253 
254  static const TargetRegisterClass *getFinalVRegClass(
256  static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
258 
259  private:
260  MachineDominatorTree *MDT = nullptr;
261 
262  bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
263  static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
264  unsigned NewSub = Hexagon::NoSubRegister);
265  };
266 
267  using HBS = HexagonBitSimplify;
268 
269  // The purpose of this class is to provide a common facility to traverse
270  // the function top-down or bottom-up via the dominator tree, and keep
271  // track of the available registers.
272  class Transformation {
273  public:
274  bool TopDown;
275 
276  Transformation(bool TD) : TopDown(TD) {}
277  virtual ~Transformation() = default;
278 
279  virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
280  };
281 
282 } // end anonymous namespace
283 
284 char HexagonBitSimplify::ID = 0;
285 
286 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify",
287  "Hexagon bit simplification", false, false)
289 INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify",
291 
292 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
293  RegisterSet &AVs) {
294  bool Changed = false;
295 
296  if (T.TopDown)
297  Changed = T.processBlock(B, AVs);
298 
299  RegisterSet Defs;
300  for (auto &I : B)
301  getInstrDefs(I, Defs);
302  RegisterSet NewAVs = AVs;
303  NewAVs.insert(Defs);
304 
305  for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
306  Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
307 
308  if (!T.TopDown)
309  Changed |= T.processBlock(B, AVs);
310 
311  return Changed;
312 }
313 
314 //
315 // Utility functions:
316 //
317 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
318  RegisterSet &Defs) {
319  for (auto &Op : MI.operands()) {
320  if (!Op.isReg() || !Op.isDef())
321  continue;
322  Register R = Op.getReg();
323  if (!R.isVirtual())
324  continue;
325  Defs.insert(R);
326  }
327 }
328 
329 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
330  RegisterSet &Uses) {
331  for (auto &Op : MI.operands()) {
332  if (!Op.isReg() || !Op.isUse())
333  continue;
334  Register R = Op.getReg();
335  if (!R.isVirtual())
336  continue;
337  Uses.insert(R);
338  }
339 }
340 
341 // Check if all the bits in range [B, E) in both cells are equal.
344  uint16_t W) {
345  for (uint16_t i = 0; i < W; ++i) {
346  // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
347  if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
348  return false;
349  // Same for RC2[i].
350  if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
351  return false;
352  if (RC1[B1+i] != RC2[B2+i])
353  return false;
354  }
355  return true;
356 }
357 
359  uint16_t B, uint16_t W) {
360  assert(B < RC.width() && B+W <= RC.width());
361  for (uint16_t i = B; i < B+W; ++i)
362  if (!RC[i].is(0))
363  return false;
364  return true;
365 }
366 
367 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
368  uint16_t B, uint16_t W, uint64_t &U) {
369  assert(B < RC.width() && B+W <= RC.width());
370  int64_t T = 0;
371  for (uint16_t i = B+W; i > B; --i) {
372  const BitTracker::BitValue &BV = RC[i-1];
373  T <<= 1;
374  if (BV.is(1))
375  T |= 1;
376  else if (!BV.is(0))
377  return false;
378  }
379  U = T;
380  return true;
381 }
382 
383 bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR,
385  if (!OldR.isVirtual() || !NewR.isVirtual())
386  return false;
387  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
388  decltype(End) NextI;
389  for (auto I = Begin; I != End; I = NextI) {
390  NextI = std::next(I);
391  I->setReg(NewR);
392  }
393  return Begin != End;
394 }
395 
396 bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR,
397  unsigned NewSR,
399  if (!OldR.isVirtual() || !NewR.isVirtual())
400  return false;
401  if (hasTiedUse(OldR, MRI, NewSR))
402  return false;
403  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
404  decltype(End) NextI;
405  for (auto I = Begin; I != End; I = NextI) {
406  NextI = std::next(I);
407  I->setReg(NewR);
408  I->setSubReg(NewSR);
409  }
410  return Begin != End;
411 }
412 
413 bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR,
414  Register NewR, unsigned NewSR,
416  if (!OldR.isVirtual() || !NewR.isVirtual())
417  return false;
418  if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
419  return false;
420  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
421  decltype(End) NextI;
422  for (auto I = Begin; I != End; I = NextI) {
423  NextI = std::next(I);
424  if (I->getSubReg() != OldSR)
425  continue;
426  I->setReg(NewR);
427  I->setSubReg(NewSR);
428  }
429  return Begin != End;
430 }
431 
432 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
433 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
434 // if this succeeded, otherwise return false.
435 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
436  unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
437  const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
438  if (RR.Sub == 0) {
439  Begin = 0;
441  return true;
442  }
443 
444  Begin = 0;
445 
446  switch (RC->getID()) {
447  case Hexagon::DoubleRegsRegClassID:
448  case Hexagon::HvxWRRegClassID:
450  if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
451  Begin = Width;
452  break;
453  default:
454  return false;
455  }
456  return true;
457 }
458 
459 
460 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
461 // subregister.
462 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
464  const MachineRegisterInfo &MRI) {
465  assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
466  unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
467  auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg());
468  auto &HRI = static_cast<const HexagonRegisterInfo&>(
470  unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);
471  unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);
472  assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
473  if (Sub1 == SubLo && Sub2 == SubHi) {
474  SL = I.getOperand(1);
475  SH = I.getOperand(3);
476  return true;
477  }
478  if (Sub1 == SubHi && Sub2 == SubLo) {
479  SH = I.getOperand(1);
480  SL = I.getOperand(3);
481  return true;
482  }
483  return false;
484 }
485 
486 // All stores (except 64-bit stores) take a 32-bit register as the source
487 // of the value to be stored. If the instruction stores into a location
488 // that is shorter than 32 bits, some bits of the source register are not
489 // used. For each store instruction, calculate the set of used bits in
490 // the source register, and set appropriate bits in Bits. Return true if
491 // the bits are calculated, false otherwise.
492 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
493  uint16_t Begin) {
494  using namespace Hexagon;
495 
496  switch (Opc) {
497  // Store byte
498  case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
499  case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
500  case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
501  case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
502  case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
503  case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
504  case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
505  case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
506  case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
507  case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
508  case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
509  case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
510  case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
511  case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
512  case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
513  case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
514  case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
515  case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
516  case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
517  case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
518  case S4_storerb_ap: // memb(Re32=#U6)=Rt32
519  case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
520  case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
521  case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
522  case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
523  case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
524  case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
525  case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
526  case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
527  case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
528  case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
529  case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
530  case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
531  case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
532  case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
533  case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
534  case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
535  case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
536  case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
537  case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
538  case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
539  case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
540  case S2_storerbgp: // memb(gp+#u16:0)=Rt32
541  case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
542  case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
543  case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
544  case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
545  case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
546  case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
547  case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
548  case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
549  case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
550  Bits.set(Begin, Begin+8);
551  return true;
552 
553  // Store low half
554  case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
555  case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
556  case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
557  case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
558  case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
559  case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
560  case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
561  case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
562  case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
563  case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
564  case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
565  case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
566  case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
567  case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
568  case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
569  case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
570  case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
571  case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
572  case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
573  case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
574  case S4_storerh_ap: // memh(Re32=#U6)=Rt32
575  case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
576  case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
577  case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
578  case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
579  case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
580  case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
581  case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
582  case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
583  case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
584  case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
585  case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
586  case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
587  case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
588  case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
589  case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
590  case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
591  case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
592  case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
593  case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
594  case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
595  case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
596  case S2_storerhgp: // memh(gp+#u16:1)=Rt32
597  case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
598  case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
599  case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
600  case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
601  case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
602  case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
603  case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
604  case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
605  case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
606  Bits.set(Begin, Begin+16);
607  return true;
608 
609  // Store high half
610  case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
611  case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
612  case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
613  case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
614  case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
615  case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
616  case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
617  case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
618  case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
619  case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
620  case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
621  case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
622  case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
623  case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
624  case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
625  case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
626  case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
627  case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
628  case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
629  case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
630  case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
631  case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
632  case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
633  case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
634  case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
635  case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
636  Bits.set(Begin+16, Begin+32);
637  return true;
638  }
639 
640  return false;
641 }
642 
643 // For an instruction with opcode Opc, calculate the set of bits that it
644 // uses in a register in operand OpN. This only calculates the set of used
645 // bits for cases where it does not depend on any operands (as is the case
646 // in shifts, for example). For concrete instructions from a program, the
647 // operand may be a subregister of a larger register, while Bits would
648 // correspond to the larger register in its entirety. Because of that,
649 // the parameter Begin can be used to indicate which bit of Bits should be
650 // considered the LSB of the operand.
651 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
652  BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
653  using namespace Hexagon;
654 
655  const MCInstrDesc &D = HII.get(Opc);
656  if (D.mayStore()) {
657  if (OpN == D.getNumOperands()-1)
658  return getUsedBitsInStore(Opc, Bits, Begin);
659  return false;
660  }
661 
662  switch (Opc) {
663  // One register source. Used bits: R1[0-7].
664  case A2_sxtb:
665  case A2_zxtb:
666  case A4_cmpbeqi:
667  case A4_cmpbgti:
668  case A4_cmpbgtui:
669  if (OpN == 1) {
670  Bits.set(Begin, Begin+8);
671  return true;
672  }
673  break;
674 
675  // One register source. Used bits: R1[0-15].
676  case A2_aslh:
677  case A2_sxth:
678  case A2_zxth:
679  case A4_cmpheqi:
680  case A4_cmphgti:
681  case A4_cmphgtui:
682  if (OpN == 1) {
683  Bits.set(Begin, Begin+16);
684  return true;
685  }
686  break;
687 
688  // One register source. Used bits: R1[16-31].
689  case A2_asrh:
690  if (OpN == 1) {
691  Bits.set(Begin+16, Begin+32);
692  return true;
693  }
694  break;
695 
696  // Two register sources. Used bits: R1[0-7], R2[0-7].
697  case A4_cmpbeq:
698  case A4_cmpbgt:
699  case A4_cmpbgtu:
700  if (OpN == 1) {
701  Bits.set(Begin, Begin+8);
702  return true;
703  }
704  break;
705 
706  // Two register sources. Used bits: R1[0-15], R2[0-15].
707  case A4_cmpheq:
708  case A4_cmphgt:
709  case A4_cmphgtu:
710  case A2_addh_h16_ll:
711  case A2_addh_h16_sat_ll:
712  case A2_addh_l16_ll:
713  case A2_addh_l16_sat_ll:
714  case A2_combine_ll:
715  case A2_subh_h16_ll:
716  case A2_subh_h16_sat_ll:
717  case A2_subh_l16_ll:
718  case A2_subh_l16_sat_ll:
719  case M2_mpy_acc_ll_s0:
720  case M2_mpy_acc_ll_s1:
721  case M2_mpy_acc_sat_ll_s0:
722  case M2_mpy_acc_sat_ll_s1:
723  case M2_mpy_ll_s0:
724  case M2_mpy_ll_s1:
725  case M2_mpy_nac_ll_s0:
726  case M2_mpy_nac_ll_s1:
727  case M2_mpy_nac_sat_ll_s0:
728  case M2_mpy_nac_sat_ll_s1:
729  case M2_mpy_rnd_ll_s0:
730  case M2_mpy_rnd_ll_s1:
731  case M2_mpy_sat_ll_s0:
732  case M2_mpy_sat_ll_s1:
733  case M2_mpy_sat_rnd_ll_s0:
734  case M2_mpy_sat_rnd_ll_s1:
735  case M2_mpyd_acc_ll_s0:
736  case M2_mpyd_acc_ll_s1:
737  case M2_mpyd_ll_s0:
738  case M2_mpyd_ll_s1:
739  case M2_mpyd_nac_ll_s0:
740  case M2_mpyd_nac_ll_s1:
741  case M2_mpyd_rnd_ll_s0:
742  case M2_mpyd_rnd_ll_s1:
743  case M2_mpyu_acc_ll_s0:
744  case M2_mpyu_acc_ll_s1:
745  case M2_mpyu_ll_s0:
746  case M2_mpyu_ll_s1:
747  case M2_mpyu_nac_ll_s0:
748  case M2_mpyu_nac_ll_s1:
749  case M2_mpyud_acc_ll_s0:
750  case M2_mpyud_acc_ll_s1:
751  case M2_mpyud_ll_s0:
752  case M2_mpyud_ll_s1:
753  case M2_mpyud_nac_ll_s0:
754  case M2_mpyud_nac_ll_s1:
755  if (OpN == 1 || OpN == 2) {
756  Bits.set(Begin, Begin+16);
757  return true;
758  }
759  break;
760 
761  // Two register sources. Used bits: R1[0-15], R2[16-31].
762  case A2_addh_h16_lh:
763  case A2_addh_h16_sat_lh:
764  case A2_combine_lh:
765  case A2_subh_h16_lh:
766  case A2_subh_h16_sat_lh:
767  case M2_mpy_acc_lh_s0:
768  case M2_mpy_acc_lh_s1:
769  case M2_mpy_acc_sat_lh_s0:
770  case M2_mpy_acc_sat_lh_s1:
771  case M2_mpy_lh_s0:
772  case M2_mpy_lh_s1:
773  case M2_mpy_nac_lh_s0:
774  case M2_mpy_nac_lh_s1:
775  case M2_mpy_nac_sat_lh_s0:
776  case M2_mpy_nac_sat_lh_s1:
777  case M2_mpy_rnd_lh_s0:
778  case M2_mpy_rnd_lh_s1:
779  case M2_mpy_sat_lh_s0:
780  case M2_mpy_sat_lh_s1:
781  case M2_mpy_sat_rnd_lh_s0:
782  case M2_mpy_sat_rnd_lh_s1:
783  case M2_mpyd_acc_lh_s0:
784  case M2_mpyd_acc_lh_s1:
785  case M2_mpyd_lh_s0:
786  case M2_mpyd_lh_s1:
787  case M2_mpyd_nac_lh_s0:
788  case M2_mpyd_nac_lh_s1:
789  case M2_mpyd_rnd_lh_s0:
790  case M2_mpyd_rnd_lh_s1:
791  case M2_mpyu_acc_lh_s0:
792  case M2_mpyu_acc_lh_s1:
793  case M2_mpyu_lh_s0:
794  case M2_mpyu_lh_s1:
795  case M2_mpyu_nac_lh_s0:
796  case M2_mpyu_nac_lh_s1:
797  case M2_mpyud_acc_lh_s0:
798  case M2_mpyud_acc_lh_s1:
799  case M2_mpyud_lh_s0:
800  case M2_mpyud_lh_s1:
801  case M2_mpyud_nac_lh_s0:
802  case M2_mpyud_nac_lh_s1:
803  // These four are actually LH.
804  case A2_addh_l16_hl:
805  case A2_addh_l16_sat_hl:
806  case A2_subh_l16_hl:
807  case A2_subh_l16_sat_hl:
808  if (OpN == 1) {
809  Bits.set(Begin, Begin+16);
810  return true;
811  }
812  if (OpN == 2) {
813  Bits.set(Begin+16, Begin+32);
814  return true;
815  }
816  break;
817 
818  // Two register sources, used bits: R1[16-31], R2[0-15].
819  case A2_addh_h16_hl:
820  case A2_addh_h16_sat_hl:
821  case A2_combine_hl:
822  case A2_subh_h16_hl:
823  case A2_subh_h16_sat_hl:
824  case M2_mpy_acc_hl_s0:
825  case M2_mpy_acc_hl_s1:
826  case M2_mpy_acc_sat_hl_s0:
827  case M2_mpy_acc_sat_hl_s1:
828  case M2_mpy_hl_s0:
829  case M2_mpy_hl_s1:
830  case M2_mpy_nac_hl_s0:
831  case M2_mpy_nac_hl_s1:
832  case M2_mpy_nac_sat_hl_s0:
833  case M2_mpy_nac_sat_hl_s1:
834  case M2_mpy_rnd_hl_s0:
835  case M2_mpy_rnd_hl_s1:
836  case M2_mpy_sat_hl_s0:
837  case M2_mpy_sat_hl_s1:
838  case M2_mpy_sat_rnd_hl_s0:
839  case M2_mpy_sat_rnd_hl_s1:
840  case M2_mpyd_acc_hl_s0:
841  case M2_mpyd_acc_hl_s1:
842  case M2_mpyd_hl_s0:
843  case M2_mpyd_hl_s1:
844  case M2_mpyd_nac_hl_s0:
845  case M2_mpyd_nac_hl_s1:
846  case M2_mpyd_rnd_hl_s0:
847  case M2_mpyd_rnd_hl_s1:
848  case M2_mpyu_acc_hl_s0:
849  case M2_mpyu_acc_hl_s1:
850  case M2_mpyu_hl_s0:
851  case M2_mpyu_hl_s1:
852  case M2_mpyu_nac_hl_s0:
853  case M2_mpyu_nac_hl_s1:
854  case M2_mpyud_acc_hl_s0:
855  case M2_mpyud_acc_hl_s1:
856  case M2_mpyud_hl_s0:
857  case M2_mpyud_hl_s1:
858  case M2_mpyud_nac_hl_s0:
859  case M2_mpyud_nac_hl_s1:
860  if (OpN == 1) {
861  Bits.set(Begin+16, Begin+32);
862  return true;
863  }
864  if (OpN == 2) {
865  Bits.set(Begin, Begin+16);
866  return true;
867  }
868  break;
869 
870  // Two register sources, used bits: R1[16-31], R2[16-31].
871  case A2_addh_h16_hh:
872  case A2_addh_h16_sat_hh:
873  case A2_combine_hh:
874  case A2_subh_h16_hh:
875  case A2_subh_h16_sat_hh:
876  case M2_mpy_acc_hh_s0:
877  case M2_mpy_acc_hh_s1:
878  case M2_mpy_acc_sat_hh_s0:
879  case M2_mpy_acc_sat_hh_s1:
880  case M2_mpy_hh_s0:
881  case M2_mpy_hh_s1:
882  case M2_mpy_nac_hh_s0:
883  case M2_mpy_nac_hh_s1:
884  case M2_mpy_nac_sat_hh_s0:
885  case M2_mpy_nac_sat_hh_s1:
886  case M2_mpy_rnd_hh_s0:
887  case M2_mpy_rnd_hh_s1:
888  case M2_mpy_sat_hh_s0:
889  case M2_mpy_sat_hh_s1:
890  case M2_mpy_sat_rnd_hh_s0:
891  case M2_mpy_sat_rnd_hh_s1:
892  case M2_mpyd_acc_hh_s0:
893  case M2_mpyd_acc_hh_s1:
894  case M2_mpyd_hh_s0:
895  case M2_mpyd_hh_s1:
896  case M2_mpyd_nac_hh_s0:
897  case M2_mpyd_nac_hh_s1:
898  case M2_mpyd_rnd_hh_s0:
899  case M2_mpyd_rnd_hh_s1:
900  case M2_mpyu_acc_hh_s0:
901  case M2_mpyu_acc_hh_s1:
902  case M2_mpyu_hh_s0:
903  case M2_mpyu_hh_s1:
904  case M2_mpyu_nac_hh_s0:
905  case M2_mpyu_nac_hh_s1:
906  case M2_mpyud_acc_hh_s0:
907  case M2_mpyud_acc_hh_s1:
908  case M2_mpyud_hh_s0:
909  case M2_mpyud_hh_s1:
910  case M2_mpyud_nac_hh_s0:
911  case M2_mpyud_nac_hh_s1:
912  if (OpN == 1 || OpN == 2) {
913  Bits.set(Begin+16, Begin+32);
914  return true;
915  }
916  break;
917  }
918 
919  return false;
920 }
921 
922 // Calculate the register class that matches Reg:Sub. For example, if
923 // %1 is a double register, then %1:isub_hi would match the "int"
924 // register class.
925 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
927  if (!RR.Reg.isVirtual())
928  return nullptr;
929  auto *RC = MRI.getRegClass(RR.Reg);
930  if (RR.Sub == 0)
931  return RC;
932  auto &HRI = static_cast<const HexagonRegisterInfo&>(
934 
935  auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
936  (void)HRI;
937  assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) ||
938  Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi));
939  };
940 
941  switch (RC->getID()) {
942  case Hexagon::DoubleRegsRegClassID:
943  VerifySR(RC, RR.Sub);
944  return &Hexagon::IntRegsRegClass;
945  case Hexagon::HvxWRRegClassID:
946  VerifySR(RC, RR.Sub);
947  return &Hexagon::HvxVRRegClass;
948  }
949  return nullptr;
950 }
951 
952 // Check if RD could be replaced with RS at any possible use of RD.
953 // For example a predicate register cannot be replaced with a integer
954 // register, but a 64-bit register with a subregister can be replaced
955 // with a 32-bit register.
956 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
958  if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual())
959  return false;
960  // Return false if one (or both) classes are nullptr.
961  auto *DRC = getFinalVRegClass(RD, MRI);
962  if (!DRC)
963  return false;
964 
965  return DRC == getFinalVRegClass(RS, MRI);
966 }
967 
968 bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
969  unsigned NewSub) {
970  if (!PreserveTiedOps)
971  return false;
973  [NewSub] (const MachineOperand &Op) -> bool {
974  return Op.getSubReg() != NewSub && Op.isTied();
975  });
976 }
977 
978 namespace {
979 
980  class DeadCodeElimination {
981  public:
982  DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
983  : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
984  MDT(mdt), MRI(mf.getRegInfo()) {}
985 
986  bool run() {
987  return runOnNode(MDT.getRootNode());
988  }
989 
990  private:
991  bool isDead(unsigned R) const;
992  bool runOnNode(MachineDomTreeNode *N);
993 
994  MachineFunction &MF;
995  const HexagonInstrInfo &HII;
998  };
999 
1000 } // end anonymous namespace
1001 
1002 bool DeadCodeElimination::isDead(unsigned R) const {
1003  for (const MachineOperand &MO : MRI.use_operands(R)) {
1004  const MachineInstr *UseI = MO.getParent();
1005  if (UseI->isDebugValue())
1006  continue;
1007  if (UseI->isPHI()) {
1008  assert(!UseI->getOperand(0).getSubReg());
1009  Register DR = UseI->getOperand(0).getReg();
1010  if (DR == R)
1011  continue;
1012  }
1013  return false;
1014  }
1015  return true;
1016 }
1017 
1018 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
1019  bool Changed = false;
1020 
1021  for (auto *DTN : children<MachineDomTreeNode*>(N))
1022  Changed |= runOnNode(DTN);
1023 
1024  MachineBasicBlock *B = N->getBlock();
1025  std::vector<MachineInstr*> Instrs;
1026  for (MachineInstr &MI : llvm::reverse(*B))
1027  Instrs.push_back(&MI);
1028 
1029  for (auto MI : Instrs) {
1030  unsigned Opc = MI->getOpcode();
1031  // Do not touch lifetime markers. This is why the target-independent DCE
1032  // cannot be used.
1033  if (Opc == TargetOpcode::LIFETIME_START ||
1035  continue;
1036  bool Store = false;
1037  if (MI->isInlineAsm())
1038  continue;
1039  // Delete PHIs if possible.
1040  if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
1041  continue;
1042 
1043  bool AllDead = true;
1045  for (auto &Op : MI->operands()) {
1046  if (!Op.isReg() || !Op.isDef())
1047  continue;
1048  Register R = Op.getReg();
1049  if (!R.isVirtual() || !isDead(R)) {
1050  AllDead = false;
1051  break;
1052  }
1053  Regs.push_back(R);
1054  }
1055  if (!AllDead)
1056  continue;
1057 
1058  B->erase(MI);
1059  for (unsigned i = 0, n = Regs.size(); i != n; ++i)
1061  Changed = true;
1062  }
1063 
1064  return Changed;
1065 }
1066 
1067 namespace {
1068 
1069 // Eliminate redundant instructions
1070 //
1071 // This transformation will identify instructions where the output register
1072 // is the same as one of its input registers. This only works on instructions
1073 // that define a single register (unlike post-increment loads, for example).
1074 // The equality check is actually more detailed: the code calculates which
1075 // bits of the output are used, and only compares these bits with the input
1076 // registers.
1077 // If the output matches an input, the instruction is replaced with COPY.
1078 // The copies will be removed by another transformation.
1079  class RedundantInstrElimination : public Transformation {
1080  public:
1081  RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
1082  const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1083  : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1084 
1085  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1086 
1087  private:
1088  bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
1089  unsigned &LostB, unsigned &LostE);
1090  bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
1091  unsigned &LostB, unsigned &LostE);
1092  bool computeUsedBits(unsigned Reg, BitVector &Bits);
1093  bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
1094  uint16_t Begin);
1095  bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
1096 
1097  const HexagonInstrInfo &HII;
1098  const HexagonRegisterInfo &HRI;
1100  BitTracker &BT;
1101  };
1102 
1103 } // end anonymous namespace
1104 
1105 // Check if the instruction is a lossy shift left, where the input being
1106 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1107 // of bit indices that are lost.
1108 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
1109  unsigned OpN, unsigned &LostB, unsigned &LostE) {
1110  using namespace Hexagon;
1111 
1112  unsigned Opc = MI.getOpcode();
1113  unsigned ImN, RegN, Width;
1114  switch (Opc) {
1115  case S2_asl_i_p:
1116  ImN = 2;
1117  RegN = 1;
1118  Width = 64;
1119  break;
1120  case S2_asl_i_p_acc:
1121  case S2_asl_i_p_and:
1122  case S2_asl_i_p_nac:
1123  case S2_asl_i_p_or:
1124  case S2_asl_i_p_xacc:
1125  ImN = 3;
1126  RegN = 2;
1127  Width = 64;
1128  break;
1129  case S2_asl_i_r:
1130  ImN = 2;
1131  RegN = 1;
1132  Width = 32;
1133  break;
1134  case S2_addasl_rrri:
1135  case S4_andi_asl_ri:
1136  case S4_ori_asl_ri:
1137  case S4_addi_asl_ri:
1138  case S4_subi_asl_ri:
1139  case S2_asl_i_r_acc:
1140  case S2_asl_i_r_and:
1141  case S2_asl_i_r_nac:
1142  case S2_asl_i_r_or:
1143  case S2_asl_i_r_sat:
1144  case S2_asl_i_r_xacc:
1145  ImN = 3;
1146  RegN = 2;
1147  Width = 32;
1148  break;
1149  default:
1150  return false;
1151  }
1152 
1153  if (RegN != OpN)
1154  return false;
1155 
1156  assert(MI.getOperand(ImN).isImm());
1157  unsigned S = MI.getOperand(ImN).getImm();
1158  if (S == 0)
1159  return false;
1160  LostB = Width-S;
1161  LostE = Width;
1162  return true;
1163 }
1164 
1165 // Check if the instruction is a lossy shift right, where the input being
1166 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1167 // of bit indices that are lost.
1168 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
1169  unsigned OpN, unsigned &LostB, unsigned &LostE) {
1170  using namespace Hexagon;
1171 
1172  unsigned Opc = MI.getOpcode();
1173  unsigned ImN, RegN;
1174  switch (Opc) {
1175  case S2_asr_i_p:
1176  case S2_lsr_i_p:
1177  ImN = 2;
1178  RegN = 1;
1179  break;
1180  case S2_asr_i_p_acc:
1181  case S2_asr_i_p_and:
1182  case S2_asr_i_p_nac:
1183  case S2_asr_i_p_or:
1184  case S2_lsr_i_p_acc:
1185  case S2_lsr_i_p_and:
1186  case S2_lsr_i_p_nac:
1187  case S2_lsr_i_p_or:
1188  case S2_lsr_i_p_xacc:
1189  ImN = 3;
1190  RegN = 2;
1191  break;
1192  case S2_asr_i_r:
1193  case S2_lsr_i_r:
1194  ImN = 2;
1195  RegN = 1;
1196  break;
1197  case S4_andi_lsr_ri:
1198  case S4_ori_lsr_ri:
1199  case S4_addi_lsr_ri:
1200  case S4_subi_lsr_ri:
1201  case S2_asr_i_r_acc:
1202  case S2_asr_i_r_and:
1203  case S2_asr_i_r_nac:
1204  case S2_asr_i_r_or:
1205  case S2_lsr_i_r_acc:
1206  case S2_lsr_i_r_and:
1207  case S2_lsr_i_r_nac:
1208  case S2_lsr_i_r_or:
1209  case S2_lsr_i_r_xacc:
1210  ImN = 3;
1211  RegN = 2;
1212  break;
1213 
1214  default:
1215  return false;
1216  }
1217 
1218  if (RegN != OpN)
1219  return false;
1220 
1221  assert(MI.getOperand(ImN).isImm());
1222  unsigned S = MI.getOperand(ImN).getImm();
1223  LostB = 0;
1224  LostE = S;
1225  return true;
1226 }
1227 
1228 // Calculate the bit vector that corresponds to the used bits of register Reg.
1229 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1230 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1231 // wise, it returns true and sets the corresponding bits in Bits.
1232 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
1233  BitVector Used(Bits.size());
1234  RegisterSet Visited;
1235  std::vector<unsigned> Pending;
1236  Pending.push_back(Reg);
1237 
1238  for (unsigned i = 0; i < Pending.size(); ++i) {
1239  unsigned R = Pending[i];
1240  if (Visited.has(R))
1241  continue;
1242  Visited.insert(R);
1243  for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
1244  BitTracker::RegisterRef UR = *I;
1245  unsigned B, W;
1246  if (!HBS::getSubregMask(UR, B, W, MRI))
1247  return false;
1248  MachineInstr &UseI = *I->getParent();
1249  if (UseI.isPHI() || UseI.isCopy()) {
1250  Register DefR = UseI.getOperand(0).getReg();
1251  if (!DefR.isVirtual())
1252  return false;
1253  Pending.push_back(DefR);
1254  } else {
1255  if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
1256  return false;
1257  }
1258  }
1259  }
1260  Bits |= Used;
1261  return true;
1262 }
1263 
1264 // Calculate the bits used by instruction MI in a register in operand OpN.
1265 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1266 // used bits in Bits. This function does not reset any bits in Bits, so
1267 // subsequent calls over different instructions will result in the union
1268 // of the used bits in all these instructions.
1269 // The register in question may be used with a sub-register, whereas Bits
1270 // holds the bits for the entire register. To keep track of that, the
1271 // argument Begin indicates where in Bits is the lowest-significant bit
1272 // of the register used in operand OpN. For example, in instruction:
1273 // %1 = S2_lsr_i_r %2:isub_hi, 10
1274 // the operand 1 is a 32-bit register, which happens to be a subregister
1275 // of the 64-bit register %2, and that subregister starts at position 32.
1276 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1277 // of %2:isub_hi.
1278 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
1279  unsigned OpN, BitVector &Bits, uint16_t Begin) {
1280  unsigned Opc = MI.getOpcode();
1281  BitVector T(Bits.size());
1282  bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
1283  // Even if we don't have bits yet, we could still provide some information
1284  // if the instruction is a lossy shift: the lost bits will be marked as
1285  // not used.
1286  unsigned LB, LE;
1287  if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1288  assert(MI.getOperand(OpN).isReg());
1289  BitTracker::RegisterRef RR = MI.getOperand(OpN);
1290  const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
1291  uint16_t Width = HRI.getRegSizeInBits(*RC);
1292 
1293  if (!GotBits)
1294  T.set(Begin, Begin+Width);
1295  assert(LB <= LE && LB < Width && LE <= Width);
1296  T.reset(Begin+LB, Begin+LE);
1297  GotBits = true;
1298  }
1299  if (GotBits)
1300  Bits |= T;
1301  return GotBits;
1302 }
1303 
1304 // Calculates the used bits in RD ("defined register"), and checks if these
1305 // bits in RS ("used register") and RD are identical.
1306 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
1308  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1309  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1310 
1311  unsigned DB, DW;
1312  if (!HBS::getSubregMask(RD, DB, DW, MRI))
1313  return false;
1314  unsigned SB, SW;
1315  if (!HBS::getSubregMask(RS, SB, SW, MRI))
1316  return false;
1317  if (SW != DW)
1318  return false;
1319 
1320  BitVector Used(DC.width());
1321  if (!computeUsedBits(RD.Reg, Used))
1322  return false;
1323 
1324  for (unsigned i = 0; i != DW; ++i)
1325  if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1326  return false;
1327  return true;
1328 }
1329 
1330 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
1331  const RegisterSet&) {
1332  if (!BT.reached(&B))
1333  return false;
1334  bool Changed = false;
1335 
1336  for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1337  MachineInstr *MI = &*I;
1338 
1339  if (MI->getOpcode() == TargetOpcode::COPY)
1340  continue;
1341  if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
1342  continue;
1343  unsigned NumD = MI->getDesc().getNumDefs();
1344  if (NumD != 1)
1345  continue;
1346 
1347  BitTracker::RegisterRef RD = MI->getOperand(0);
1348  if (!BT.has(RD.Reg))
1349  continue;
1350  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1351  auto At = MachineBasicBlock::iterator(MI);
1352 
1353  // Find a source operand that is equal to the result.
1354  for (auto &Op : MI->uses()) {
1355  if (!Op.isReg())
1356  continue;
1358  if (!BT.has(RS.Reg))
1359  continue;
1360  if (!HBS::isTransparentCopy(RD, RS, MRI))
1361  continue;
1362 
1363  unsigned BN, BW;
1364  if (!HBS::getSubregMask(RS, BN, BW, MRI))
1365  continue;
1366 
1367  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1368  if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1369  continue;
1370 
1371  // If found, replace the instruction with a COPY.
1372  const DebugLoc &DL = MI->getDebugLoc();
1373  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
1374  Register NewR = MRI.createVirtualRegister(FRC);
1375  MachineInstr *CopyI =
1376  BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1377  .addReg(RS.Reg, 0, RS.Sub);
1378  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
1379  // This pass can create copies between registers that don't have the
1380  // exact same values. Updating the tracker has to involve updating
1381  // all dependent cells. Example:
1382  // %1 = inst %2 ; %1 != %2, but used bits are equal
1383  //
1384  // %3 = copy %2 ; <- inserted
1385  // ... = %3 ; <- replaced from %2
1386  // Indirectly, we can create a "copy" between %1 and %2 even
1387  // though their exact values do not match.
1388  BT.visit(*CopyI);
1389  Changed = true;
1390  break;
1391  }
1392  }
1393 
1394  return Changed;
1395 }
1396 
1397 namespace {
1398 
1399 // Recognize instructions that produce constant values known at compile-time.
1400 // Replace them with register definitions that load these constants directly.
1401  class ConstGeneration : public Transformation {
1402  public:
1403  ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1404  MachineRegisterInfo &mri)
1405  : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1406 
1407  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1408  static bool isTfrConst(const MachineInstr &MI);
1409 
1410  private:
1411  Register genTfrConst(const TargetRegisterClass *RC, int64_t C,
1413  DebugLoc &DL);
1414 
1415  const HexagonInstrInfo &HII;
1417  BitTracker &BT;
1418  };
1419 
1420 } // end anonymous namespace
1421 
1422 bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
1423  unsigned Opc = MI.getOpcode();
1424  switch (Opc) {
1425  case Hexagon::A2_combineii:
1426  case Hexagon::A4_combineii:
1427  case Hexagon::A2_tfrsi:
1428  case Hexagon::A2_tfrpi:
1429  case Hexagon::PS_true:
1430  case Hexagon::PS_false:
1431  case Hexagon::CONST32:
1432  case Hexagon::CONST64:
1433  return true;
1434  }
1435  return false;
1436 }
1437 
1438 // Generate a transfer-immediate instruction that is appropriate for the
1439 // register class and the actual value being transferred.
1440 Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
1443  DebugLoc &DL) {
1445  if (RC == &Hexagon::IntRegsRegClass) {
1446  BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
1447  .addImm(int32_t(C));
1448  return Reg;
1449  }
1450 
1451  if (RC == &Hexagon::DoubleRegsRegClass) {
1452  if (isInt<8>(C)) {
1453  BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
1454  .addImm(C);
1455  return Reg;
1456  }
1457 
1458  unsigned Lo = Lo_32(C), Hi = Hi_32(C);
1459  if (isInt<8>(Lo) || isInt<8>(Hi)) {
1460  unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
1461  : Hexagon::A4_combineii;
1462  BuildMI(B, At, DL, HII.get(Opc), Reg)
1463  .addImm(int32_t(Hi))
1464  .addImm(int32_t(Lo));
1465  return Reg;
1466  }
1467  MachineFunction *MF = B.getParent();
1468  auto &HST = MF->getSubtarget<HexagonSubtarget>();
1469 
1470  // Disable CONST64 for tiny core since it takes a LD resource.
1471  if (!HST.isTinyCore() ||
1472  MF->getFunction().hasOptSize()) {
1473  BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
1474  .addImm(C);
1475  return Reg;
1476  }
1477  }
1478 
1479  if (RC == &Hexagon::PredRegsRegClass) {
1480  unsigned Opc;
1481  if (C == 0)
1482  Opc = Hexagon::PS_false;
1483  else if ((C & 0xFF) == 0xFF)
1484  Opc = Hexagon::PS_true;
1485  else
1486  return 0;
1487  BuildMI(B, At, DL, HII.get(Opc), Reg);
1488  return Reg;
1489  }
1490 
1491  return 0;
1492 }
1493 
1494 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1495  if (!BT.reached(&B))
1496  return false;
1497  bool Changed = false;
1498  RegisterSet Defs;
1499 
1500  for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1501  if (isTfrConst(*I))
1502  continue;
1503  Defs.clear();
1504  HBS::getInstrDefs(*I, Defs);
1505  if (Defs.count() != 1)
1506  continue;
1507  Register DR = Defs.find_first();
1508  if (!DR.isVirtual())
1509  continue;
1510  uint64_t U;
1511  const BitTracker::RegisterCell &DRC = BT.lookup(DR);
1512  if (HBS::getConst(DRC, 0, DRC.width(), U)) {
1513  int64_t C = U;
1514  DebugLoc DL = I->getDebugLoc();
1515  auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1516  Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
1517  if (ImmReg) {
1518  HBS::replaceReg(DR, ImmReg, MRI);
1519  BT.put(ImmReg, DRC);
1520  Changed = true;
1521  }
1522  }
1523  }
1524  return Changed;
1525 }
1526 
1527 namespace {
1528 
1529 // Identify pairs of available registers which hold identical values.
1530 // In such cases, only one of them needs to be calculated, the other one
1531 // will be defined as a copy of the first.
1532  class CopyGeneration : public Transformation {
1533  public:
1534  CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1535  const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1536  : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1537 
1538  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1539 
1540  private:
1541  bool findMatch(const BitTracker::RegisterRef &Inp,
1542  BitTracker::RegisterRef &Out, const RegisterSet &AVs);
1543 
1544  const HexagonInstrInfo &HII;
1545  const HexagonRegisterInfo &HRI;
1547  BitTracker &BT;
1548  RegisterSet Forbidden;
1549  };
1550 
1551 // Eliminate register copies RD = RS, by replacing the uses of RD with
1552 // with uses of RS.
1553  class CopyPropagation : public Transformation {
1554  public:
1555  CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1556  : Transformation(false), HRI(hri), MRI(mri) {}
1557 
1558  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1559 
1560  static bool isCopyReg(unsigned Opc, bool NoConv);
1561 
1562  private:
1563  bool propagateRegCopy(MachineInstr &MI);
1564 
1565  const HexagonRegisterInfo &HRI;
1567  };
1568 
1569 } // end anonymous namespace
1570 
1571 /// Check if there is a register in AVs that is identical to Inp. If so,
1572 /// set Out to the found register. The output may be a pair Reg:Sub.
1573 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
1574  BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
1575  if (!BT.has(Inp.Reg))
1576  return false;
1577  const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
1578  auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
1579  unsigned B, W;
1580  if (!HBS::getSubregMask(Inp, B, W, MRI))
1581  return false;
1582 
1583  for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) {
1584  if (!BT.has(R) || Forbidden[R])
1585  continue;
1586  const BitTracker::RegisterCell &RC = BT.lookup(R);
1587  unsigned RW = RC.width();
1588  if (W == RW) {
1589  if (FRC != MRI.getRegClass(R))
1590  continue;
1591  if (!HBS::isTransparentCopy(R, Inp, MRI))
1592  continue;
1593  if (!HBS::isEqual(InpRC, B, RC, 0, W))
1594  continue;
1595  Out.Reg = R;
1596  Out.Sub = 0;
1597  return true;
1598  }
1599  // Check if there is a super-register, whose part (with a subregister)
1600  // is equal to the input.
1601  // Only do double registers for now.
1602  if (W*2 != RW)
1603  continue;
1604  if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1605  continue;
1606 
1607  if (HBS::isEqual(InpRC, B, RC, 0, W))
1608  Out.Sub = Hexagon::isub_lo;
1609  else if (HBS::isEqual(InpRC, B, RC, W, W))
1610  Out.Sub = Hexagon::isub_hi;
1611  else
1612  continue;
1613  Out.Reg = R;
1614  if (HBS::isTransparentCopy(Out, Inp, MRI))
1615  return true;
1616  }
1617  return false;
1618 }
1619 
1620 bool CopyGeneration::processBlock(MachineBasicBlock &B,
1621  const RegisterSet &AVs) {
1622  if (!BT.reached(&B))
1623  return false;
1624  RegisterSet AVB(AVs);
1625  bool Changed = false;
1626  RegisterSet Defs;
1627 
1628  for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
1629  Defs.clear();
1630  HBS::getInstrDefs(*I, Defs);
1631 
1632  unsigned Opc = I->getOpcode();
1633  if (CopyPropagation::isCopyReg(Opc, false) ||
1634  ConstGeneration::isTfrConst(*I))
1635  continue;
1636 
1637  DebugLoc DL = I->getDebugLoc();
1638  auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1639 
1640  for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) {
1642  auto *FRC = HBS::getFinalVRegClass(R, MRI);
1643 
1644  if (findMatch(R, MR, AVB)) {
1645  Register NewR = MRI.createVirtualRegister(FRC);
1646  BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1647  .addReg(MR.Reg, 0, MR.Sub);
1648  BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
1649  HBS::replaceReg(R, NewR, MRI);
1650  Forbidden.insert(R);
1651  continue;
1652  }
1653 
1654  if (FRC == &Hexagon::DoubleRegsRegClass ||
1655  FRC == &Hexagon::HvxWRRegClass) {
1656  // Try to generate REG_SEQUENCE.
1657  unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo);
1658  unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi);
1659  BitTracker::RegisterRef TL = { R, SubLo };
1660  BitTracker::RegisterRef TH = { R, SubHi };
1661  BitTracker::RegisterRef ML, MH;
1662  if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1663  auto *FRC = HBS::getFinalVRegClass(R, MRI);
1664  Register NewR = MRI.createVirtualRegister(FRC);
1665  BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1666  .addReg(ML.Reg, 0, ML.Sub)
1667  .addImm(SubLo)
1668  .addReg(MH.Reg, 0, MH.Sub)
1669  .addImm(SubHi);
1670  BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
1671  HBS::replaceReg(R, NewR, MRI);
1672  Forbidden.insert(R);
1673  }
1674  }
1675  }
1676  }
1677 
1678  return Changed;
1679 }
1680 
1681 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
1682  switch (Opc) {
1683  case TargetOpcode::COPY:
1684  case TargetOpcode::REG_SEQUENCE:
1685  case Hexagon::A4_combineir:
1686  case Hexagon::A4_combineri:
1687  return true;
1688  case Hexagon::A2_tfr:
1689  case Hexagon::A2_tfrp:
1690  case Hexagon::A2_combinew:
1691  case Hexagon::V6_vcombine:
1692  return NoConv;
1693  default:
1694  break;
1695  }
1696  return false;
1697 }
1698 
1699 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
1700  bool Changed = false;
1701  unsigned Opc = MI.getOpcode();
1702  BitTracker::RegisterRef RD = MI.getOperand(0);
1703  assert(MI.getOperand(0).getSubReg() == 0);
1704 
1705  switch (Opc) {
1706  case TargetOpcode::COPY:
1707  case Hexagon::A2_tfr:
1708  case Hexagon::A2_tfrp: {
1709  BitTracker::RegisterRef RS = MI.getOperand(1);
1710  if (!HBS::isTransparentCopy(RD, RS, MRI))
1711  break;
1712  if (RS.Sub != 0)
1713  Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
1714  else
1715  Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
1716  break;
1717  }
1718  case TargetOpcode::REG_SEQUENCE: {
1719  BitTracker::RegisterRef SL, SH;
1720  if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
1721  const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1722  unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1723  unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1724  Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);
1725  Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);
1726  }
1727  break;
1728  }
1729  case Hexagon::A2_combinew:
1730  case Hexagon::V6_vcombine: {
1731  const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1732  unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1733  unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1734  BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
1735  Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);
1736  Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);
1737  break;
1738  }
1739  case Hexagon::A4_combineir:
1740  case Hexagon::A4_combineri: {
1741  unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1742  unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1743  : Hexagon::isub_hi;
1744  BitTracker::RegisterRef RS = MI.getOperand(SrcX);
1745  Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
1746  break;
1747  }
1748  }
1749  return Changed;
1750 }
1751 
1752 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1753  std::vector<MachineInstr*> Instrs;
1754  for (MachineInstr &MI : llvm::reverse(B))
1755  Instrs.push_back(&MI);
1756 
1757  bool Changed = false;
1758  for (auto I : Instrs) {
1759  unsigned Opc = I->getOpcode();
1760  if (!CopyPropagation::isCopyReg(Opc, true))
1761  continue;
1762  Changed |= propagateRegCopy(*I);
1763  }
1764 
1765  return Changed;
1766 }
1767 
1768 namespace {
1769 
1770 // Recognize patterns that can be simplified and replace them with the
1771 // simpler forms.
1772 // This is by no means complete
1773  class BitSimplification : public Transformation {
1774  public:
1775  BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
1776  const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
1778  : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
1779  MF(mf), BT(bt) {}
1780 
1781  bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1782 
1783  private:
1784  struct RegHalf : public BitTracker::RegisterRef {
1785  bool Low; // Low/High halfword.
1786  };
1787 
1788  bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
1789  unsigned B, RegHalf &RH);
1790  bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
1791 
1792  bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
1794  unsigned getCombineOpcode(bool HLow, bool LLow);
1795 
1796  bool genStoreUpperHalf(MachineInstr *MI);
1797  bool genStoreImmediate(MachineInstr *MI);
1798  bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
1799  const BitTracker::RegisterCell &RC);
1800  bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1801  const BitTracker::RegisterCell &RC);
1802  bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1803  const BitTracker::RegisterCell &RC);
1804  bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1805  const BitTracker::RegisterCell &RC);
1806  bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
1807  const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1808  bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
1809  const BitTracker::RegisterCell &RC);
1810  bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1811  const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1812  bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
1813 
1814  // Cache of created instructions to avoid creating duplicates.
1815  // XXX Currently only used by genBitSplit.
1816  std::vector<MachineInstr*> NewMIs;
1817 
1818  const MachineDominatorTree &MDT;
1819  const HexagonInstrInfo &HII;
1820  const HexagonRegisterInfo &HRI;
1822  MachineFunction &MF;
1823  BitTracker &BT;
1824  };
1825 
1826 } // end anonymous namespace
1827 
1828 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1829 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1830 // set the information about the found register in RH.
1831 bool BitSimplification::matchHalf(unsigned SelfR,
1832  const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
1833  // XXX This could be searching in the set of available registers, in case
1834  // the match is not exact.
1835 
1836  // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1837  // register and all the bits B..B+15 match between RC and the register.
1838  // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1839  // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1840  bool Low = false;
1841  unsigned I = B;
1842  while (I < B+16 && RC[I].num())
1843  I++;
1844  if (I == B+16)
1845  return false;
1846 
1847  Register Reg = RC[I].RefI.Reg;
1848  unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
1849  if (P < I-B)
1850  return false;
1851  unsigned Pos = P - (I-B);
1852 
1853  if (Reg == 0 || Reg == SelfR) // Don't match "self".
1854  return false;
1855  if (!Reg.isVirtual())
1856  return false;
1857  if (!BT.has(Reg))
1858  return false;
1859 
1861  if (Pos+16 > SC.width())
1862  return false;
1863 
1864  for (unsigned i = 0; i < 16; ++i) {
1865  const BitTracker::BitValue &RV = RC[i+B];
1866  if (RV.Type == BitTracker::BitValue::Ref) {
1867  if (RV.RefI.Reg != Reg)
1868  return false;
1869  if (RV.RefI.Pos != i+Pos)
1870  return false;
1871  continue;
1872  }
1873  if (RC[i+B] != SC[i+Pos])
1874  return false;
1875  }
1876 
1877  unsigned Sub = 0;
1878  switch (Pos) {
1879  case 0:
1880  Sub = Hexagon::isub_lo;
1881  Low = true;
1882  break;
1883  case 16:
1884  Sub = Hexagon::isub_lo;
1885  Low = false;
1886  break;
1887  case 32:
1888  Sub = Hexagon::isub_hi;
1889  Low = true;
1890  break;
1891  case 48:
1892  Sub = Hexagon::isub_hi;
1893  Low = false;
1894  break;
1895  default:
1896  return false;
1897  }
1898 
1899  RH.Reg = Reg;
1900  RH.Sub = Sub;
1901  RH.Low = Low;
1902  // If the subregister is not valid with the register, set it to 0.
1903  if (!HBS::getFinalVRegClass(RH, MRI))
1904  RH.Sub = 0;
1905 
1906  return true;
1907 }
1908 
1909 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
1910  unsigned OpNum) {
1911  auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1912  auto *RRC = HBS::getFinalVRegClass(R, MRI);
1913  return OpRC->hasSubClassEq(RRC);
1914 }
1915 
1916 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1917 // set the inputs Rs and Rt.
1918 bool BitSimplification::matchPackhl(unsigned SelfR,
1921  RegHalf L1, H1, L2, H2;
1922 
1923  if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1924  return false;
1925  if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1926  return false;
1927 
1928  // Rs = H1.L1, Rt = H2.L2
1929  if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1930  return false;
1931  if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1932  return false;
1933 
1934  Rs = H1;
1935  Rt = H2;
1936  return true;
1937 }
1938 
1939 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
1940  return HLow ? LLow ? Hexagon::A2_combine_ll
1941  : Hexagon::A2_combine_lh
1942  : LLow ? Hexagon::A2_combine_hl
1943  : Hexagon::A2_combine_hh;
1944 }
1945 
1946 // If MI stores the upper halfword of a register (potentially obtained via
1947 // shifts or extracts), replace it with a storerf instruction. This could
1948 // cause the "extraction" code to become dead.
1949 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
1950  unsigned Opc = MI->getOpcode();
1951  if (Opc != Hexagon::S2_storerh_io)
1952  return false;
1953 
1954  MachineOperand &ValOp = MI->getOperand(2);
1955  BitTracker::RegisterRef RS = ValOp;
1956  if (!BT.has(RS.Reg))
1957  return false;
1958  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1959  RegHalf H;
1960  if (!matchHalf(0, RC, 0, H))
1961  return false;
1962  if (H.Low)
1963  return false;
1964  MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1965  ValOp.setReg(H.Reg);
1966  ValOp.setSubReg(H.Sub);
1967  return true;
1968 }
1969 
1970 // If MI stores a value known at compile-time, and the value is within a range
1971 // that avoids using constant-extenders, replace it with a store-immediate.
1972 bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
1973  unsigned Opc = MI->getOpcode();
1974  unsigned Align = 0;
1975  switch (Opc) {
1976  case Hexagon::S2_storeri_io:
1977  Align++;
1979  case Hexagon::S2_storerh_io:
1980  Align++;
1982  case Hexagon::S2_storerb_io:
1983  break;
1984  default:
1985  return false;
1986  }
1987 
1988  // Avoid stores to frame-indices (due to an unknown offset).
1989  if (!MI->getOperand(0).isReg())
1990  return false;
1991  MachineOperand &OffOp = MI->getOperand(1);
1992  if (!OffOp.isImm())
1993  return false;
1994 
1995  int64_t Off = OffOp.getImm();
1996  // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1997  if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1998  return false;
1999  // Source register:
2000  BitTracker::RegisterRef RS = MI->getOperand(2);
2001  if (!BT.has(RS.Reg))
2002  return false;
2003  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
2004  uint64_t U;
2005  if (!HBS::getConst(RC, 0, RC.width(), U))
2006  return false;
2007 
2008  // Only consider 8-bit values to avoid constant-extenders.
2009  int V;
2010  switch (Opc) {
2011  case Hexagon::S2_storerb_io:
2012  V = int8_t(U);
2013  break;
2014  case Hexagon::S2_storerh_io:
2015  V = int16_t(U);
2016  break;
2017  case Hexagon::S2_storeri_io:
2018  V = int32_t(U);
2019  break;
2020  default:
2021  // Opc is already checked above to be one of the three store instructions.
2022  // This silences a -Wuninitialized false positive on GCC 5.4.
2023  llvm_unreachable("Unexpected store opcode");
2024  }
2025  if (!isInt<8>(V))
2026  return false;
2027 
2028  MI->removeOperand(2);
2029  switch (Opc) {
2030  case Hexagon::S2_storerb_io:
2031  MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
2032  break;
2033  case Hexagon::S2_storerh_io:
2034  MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
2035  break;
2036  case Hexagon::S2_storeri_io:
2037  MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
2038  break;
2039  }
2040  MI->addOperand(MachineOperand::CreateImm(V));
2041  return true;
2042 }
2043 
2044 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2045 // last instruction in a sequence that results in something equivalent to
2046 // the pack-halfwords. The intent is to cause the entire sequence to become
2047 // dead.
2048 bool BitSimplification::genPackhl(MachineInstr *MI,
2050  unsigned Opc = MI->getOpcode();
2051  if (Opc == Hexagon::S2_packhl)
2052  return false;
2053  BitTracker::RegisterRef Rs, Rt;
2054  if (!matchPackhl(RD.Reg, RC, Rs, Rt))
2055  return false;
2056  if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2057  !validateReg(Rt, Hexagon::S2_packhl, 2))
2058  return false;
2059 
2060  MachineBasicBlock &B = *MI->getParent();
2061  Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2062  DebugLoc DL = MI->getDebugLoc();
2063  auto At = MI->isPHI() ? B.getFirstNonPHI()
2065  BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
2066  .addReg(Rs.Reg, 0, Rs.Sub)
2067  .addReg(Rt.Reg, 0, Rt.Sub);
2068  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2069  BT.put(BitTracker::RegisterRef(NewR), RC);
2070  return true;
2071 }
2072 
2073 // If MI produces halfword of the input in the low half of the output,
2074 // replace it with zero-extend or extractu.
2075 bool BitSimplification::genExtractHalf(MachineInstr *MI,
2077  RegHalf L;
2078  // Check for halfword in low 16 bits, zeros elsewhere.
2079  if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2080  return false;
2081 
2082  unsigned Opc = MI->getOpcode();
2083  MachineBasicBlock &B = *MI->getParent();
2084  DebugLoc DL = MI->getDebugLoc();
2085 
2086  // Prefer zxth, since zxth can go in any slot, while extractu only in
2087  // slots 2 and 3.
2088  unsigned NewR = 0;
2089  auto At = MI->isPHI() ? B.getFirstNonPHI()
2091  if (L.Low && Opc != Hexagon::A2_zxth) {
2092  if (validateReg(L, Hexagon::A2_zxth, 1)) {
2093  NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2094  BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2095  .addReg(L.Reg, 0, L.Sub);
2096  }
2097  } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
2098  if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2099  NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2100  BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2101  .addReg(L.Reg, 0, L.Sub)
2102  .addImm(16);
2103  }
2104  }
2105  if (NewR == 0)
2106  return false;
2107  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2108  BT.put(BitTracker::RegisterRef(NewR), RC);
2109  return true;
2110 }
2111 
2112 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2113 // combine.
2114 bool BitSimplification::genCombineHalf(MachineInstr *MI,
2116  RegHalf L, H;
2117  // Check for combine h/l
2118  if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
2119  return false;
2120  // Do nothing if this is just a reg copy.
2121  if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2122  return false;
2123 
2124  unsigned Opc = MI->getOpcode();
2125  unsigned COpc = getCombineOpcode(H.Low, L.Low);
2126  if (COpc == Opc)
2127  return false;
2128  if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2129  return false;
2130 
2131  MachineBasicBlock &B = *MI->getParent();
2132  DebugLoc DL = MI->getDebugLoc();
2133  Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2134  auto At = MI->isPHI() ? B.getFirstNonPHI()
2136  BuildMI(B, At, DL, HII.get(COpc), NewR)
2137  .addReg(H.Reg, 0, H.Sub)
2138  .addReg(L.Reg, 0, L.Sub);
2139  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2140  BT.put(BitTracker::RegisterRef(NewR), RC);
2141  return true;
2142 }
2143 
2144 // If MI resets high bits of a register and keeps the lower ones, replace it
2145 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2146 bool BitSimplification::genExtractLow(MachineInstr *MI,
2148  unsigned Opc = MI->getOpcode();
2149  switch (Opc) {
2150  case Hexagon::A2_zxtb:
2151  case Hexagon::A2_zxth:
2152  case Hexagon::S2_extractu:
2153  return false;
2154  }
2155  if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
2156  int32_t Imm = MI->getOperand(2).getImm();
2157  if (isInt<10>(Imm))
2158  return false;
2159  }
2160 
2161  if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
2162  return false;
2163  unsigned W = RC.width();
2164  while (W > 0 && RC[W-1].is(0))
2165  W--;
2166  if (W == 0 || W == RC.width())
2167  return false;
2168  unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
2169  : (W == 16) ? Hexagon::A2_zxth
2170  : (W < 10) ? Hexagon::A2_andir
2171  : Hexagon::S2_extractu;
2172  MachineBasicBlock &B = *MI->getParent();
2173  DebugLoc DL = MI->getDebugLoc();
2174 
2175  for (auto &Op : MI->uses()) {
2176  if (!Op.isReg())
2177  continue;
2179  if (!BT.has(RS.Reg))
2180  continue;
2181  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2182  unsigned BN, BW;
2183  if (!HBS::getSubregMask(RS, BN, BW, MRI))
2184  continue;
2185  if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2186  continue;
2187  if (!validateReg(RS, NewOpc, 1))
2188  continue;
2189 
2190  Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2191  auto At = MI->isPHI() ? B.getFirstNonPHI()
2193  auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
2194  .addReg(RS.Reg, 0, RS.Sub);
2195  if (NewOpc == Hexagon::A2_andir)
2196  MIB.addImm((1 << W) - 1);
2197  else if (NewOpc == Hexagon::S2_extractu)
2198  MIB.addImm(W).addImm(0);
2199  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2200  BT.put(BitTracker::RegisterRef(NewR), RC);
2201  return true;
2202  }
2203  return false;
2204 }
2205 
2206 bool BitSimplification::genBitSplit(MachineInstr *MI,
2208  const RegisterSet &AVs) {
2209  if (!GenBitSplit)
2210  return false;
2211  if (MaxBitSplit.getNumOccurrences()) {
2212  if (CountBitSplit >= MaxBitSplit)
2213  return false;
2214  }
2215 
2216  unsigned Opc = MI->getOpcode();
2217  switch (Opc) {
2218  case Hexagon::A4_bitsplit:
2219  case Hexagon::A4_bitspliti:
2220  return false;
2221  }
2222 
2223  unsigned W = RC.width();
2224  if (W != 32)
2225  return false;
2226 
2227  auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
2228  unsigned Z = C.width();
2229  while (Z > 0 && C[Z-1].is(0))
2230  --Z;
2231  return C.width() - Z;
2232  };
2233 
2234  // Count the number of leading zeros in the target RC.
2235  unsigned Z = ctlz(RC);
2236  if (Z == 0 || Z == W)
2237  return false;
2238 
2239  // A simplistic analysis: assume the source register (the one being split)
2240  // is fully unknown, and that all its bits are self-references.
2241  const BitTracker::BitValue &B0 = RC[0];
2242  if (B0.Type != BitTracker::BitValue::Ref)
2243  return false;
2244 
2245  unsigned SrcR = B0.RefI.Reg;
2246  unsigned SrcSR = 0;
2247  unsigned Pos = B0.RefI.Pos;
2248 
2249  // All the non-zero bits should be consecutive bits from the same register.
2250  for (unsigned i = 1; i < W-Z; ++i) {
2251  const BitTracker::BitValue &V = RC[i];
2253  return false;
2254  if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
2255  return false;
2256  }
2257 
2258  // Now, find the other bitfield among AVs.
2259  for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2260  // The number of leading zeros here should be the number of trailing
2261  // non-zeros in RC.
2262  unsigned SRC = MRI.getRegClass(S)->getID();
2263  if (SRC != Hexagon::IntRegsRegClassID &&
2264  SRC != Hexagon::DoubleRegsRegClassID)
2265  continue;
2266  if (!BT.has(S))
2267  continue;
2269  if (SC.width() != W || ctlz(SC) != W-Z)
2270  continue;
2271  // The Z lower bits should now match SrcR.
2272  const BitTracker::BitValue &S0 = SC[0];
2273  if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
2274  continue;
2275  unsigned P = S0.RefI.Pos;
2276 
2277  if (Pos <= P && (Pos + W-Z) != P)
2278  continue;
2279  if (P < Pos && (P + Z) != Pos)
2280  continue;
2281  // The starting bitfield position must be at a subregister boundary.
2282  if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
2283  continue;
2284 
2285  unsigned I;
2286  for (I = 1; I < Z; ++I) {
2287  const BitTracker::BitValue &V = SC[I];
2289  break;
2290  if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
2291  break;
2292  }
2293  if (I != Z)
2294  continue;
2295 
2296  // Generate bitsplit where S is defined.
2297  if (MaxBitSplit.getNumOccurrences())
2298  CountBitSplit++;
2299  MachineInstr *DefS = MRI.getVRegDef(S);
2300  assert(DefS != nullptr);
2301  DebugLoc DL = DefS->getDebugLoc();
2302  MachineBasicBlock &B = *DefS->getParent();
2303  auto At = DefS->isPHI() ? B.getFirstNonPHI()
2305  if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
2306  SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2307  if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2308  continue;
2309  unsigned ImmOp = Pos <= P ? W-Z : Z;
2310 
2311  // Find an existing bitsplit instruction if one already exists.
2312  unsigned NewR = 0;
2313  for (MachineInstr *In : NewMIs) {
2314  if (In->getOpcode() != Hexagon::A4_bitspliti)
2315  continue;
2316  MachineOperand &Op1 = In->getOperand(1);
2317  if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
2318  continue;
2319  if (In->getOperand(2).getImm() != ImmOp)
2320  continue;
2321  // Check if the target register is available here.
2322  MachineOperand &Op0 = In->getOperand(0);
2323  MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
2324  assert(DefI != nullptr);
2325  if (!MDT.dominates(DefI, &*At))
2326  continue;
2327 
2328  // Found one that can be reused.
2329  assert(Op0.getSubReg() == 0);
2330  NewR = Op0.getReg();
2331  break;
2332  }
2333  if (!NewR) {
2334  NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2335  auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
2336  .addReg(SrcR, 0, SrcSR)
2337  .addImm(ImmOp);
2338  NewMIs.push_back(NewBS);
2339  }
2340  if (Pos <= P) {
2341  HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
2342  HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
2343  } else {
2344  HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
2345  HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
2346  }
2347  return true;
2348  }
2349 
2350  return false;
2351 }
2352 
2353 // Check for tstbit simplification opportunity, where the bit being checked
2354 // can be tracked back to another register. For example:
2355 // %2 = S2_lsr_i_r %1, 5
2356 // %3 = S2_tstbit_i %2, 0
2357 // =>
2358 // %3 = S2_tstbit_i %1, 5
2359 bool BitSimplification::simplifyTstbit(MachineInstr *MI,
2361  unsigned Opc = MI->getOpcode();
2362  if (Opc != Hexagon::S2_tstbit_i)
2363  return false;
2364 
2365  unsigned BN = MI->getOperand(2).getImm();
2366  BitTracker::RegisterRef RS = MI->getOperand(1);
2367  unsigned F, W;
2368  DebugLoc DL = MI->getDebugLoc();
2369  if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2370  return false;
2371  MachineBasicBlock &B = *MI->getParent();
2372  auto At = MI->isPHI() ? B.getFirstNonPHI()
2374 
2375  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2376  const BitTracker::BitValue &V = SC[F+BN];
2377  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
2378  const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
2379  // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2380  // a double register, need to use a subregister and adjust bit
2381  // number.
2383  BitTracker::RegisterRef RR(V.RefI.Reg, 0);
2384  if (TC == &Hexagon::DoubleRegsRegClass) {
2385  P = V.RefI.Pos;
2386  RR.Sub = Hexagon::isub_lo;
2387  if (P >= 32) {
2388  P -= 32;
2389  RR.Sub = Hexagon::isub_hi;
2390  }
2391  } else if (TC == &Hexagon::IntRegsRegClass) {
2392  P = V.RefI.Pos;
2393  }
2395  Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2396  BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2397  .addReg(RR.Reg, 0, RR.Sub)
2398  .addImm(P);
2399  HBS::replaceReg(RD.Reg, NewR, MRI);
2400  BT.put(NewR, RC);
2401  return true;
2402  }
2403  } else if (V.is(0) || V.is(1)) {
2404  Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2405  unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2406  BuildMI(B, At, DL, HII.get(NewOpc), NewR);
2407  HBS::replaceReg(RD.Reg, NewR, MRI);
2408  return true;
2409  }
2410 
2411  return false;
2412 }
2413 
2414 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2415 // some register from the AVs set. Create a new corresponding instruction
2416 // at the location of MI. The intent is to recognize situations where
2417 // a sequence of instructions performs an operation that is equivalent to
2418 // an extract operation, such as a shift left followed by a shift right.
2419 bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
2421  const RegisterSet &AVs) {
2422  if (!GenExtract)
2423  return false;
2424  if (MaxExtract.getNumOccurrences()) {
2425  if (CountExtract >= MaxExtract)
2426  return false;
2427  CountExtract++;
2428  }
2429 
2430  unsigned W = RC.width();
2431  unsigned RW = W;
2432  unsigned Len;
2433  bool Signed;
2434 
2435  // The code is mostly class-independent, except for the part that generates
2436  // the extract instruction, and establishes the source register (in case it
2437  // needs to use a subregister).
2438  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2439  if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2440  return false;
2441  assert(RD.Sub == 0);
2442 
2443  // Observation:
2444  // If the cell has a form of 00..0xx..x with k zeros and n remaining
2445  // bits, this could be an extractu of the n bits, but it could also be
2446  // an extractu of a longer field which happens to have 0s in the top
2447  // bit positions.
2448  // The same logic applies to sign-extended fields.
2449  //
2450  // Do not check for the extended extracts, since it would expand the
2451  // search space quite a bit. The search may be expensive as it is.
2452 
2453  const BitTracker::BitValue &TopV = RC[W-1];
2454 
2455  // Eliminate candidates that have self-referential bits, since they
2456  // cannot be extracts from other registers. Also, skip registers that
2457  // have compile-time constant values.
2458  bool IsConst = true;
2459  for (unsigned I = 0; I != W; ++I) {
2460  const BitTracker::BitValue &V = RC[I];
2461  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
2462  return false;
2463  IsConst = IsConst && (V.is(0) || V.is(1));
2464  }
2465  if (IsConst)
2466  return false;
2467 
2468  if (TopV.is(0) || TopV.is(1)) {
2469  bool S = TopV.is(1);
2470  for (--W; W > 0 && RC[W-1].is(S); --W)
2471  ;
2472  Len = W;
2473  Signed = S;
2474  // The sign bit must be a part of the field being extended.
2475  if (Signed)
2476  ++Len;
2477  } else {
2478  // This could still be a sign-extended extract.
2480  if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
2481  return false;
2482  for (--W; W > 0 && RC[W-1] == TopV; --W)
2483  ;
2484  // The top bits of RC are copies of TopV. One occurrence of TopV will
2485  // be a part of the field.
2486  Len = W + 1;
2487  Signed = true;
2488  }
2489 
2490  // This would be just a copy. It should be handled elsewhere.
2491  if (Len == RW)
2492  return false;
2493 
2494  LLVM_DEBUG({
2495  dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub)
2496  << ", MI: " << *MI;
2497  dbgs() << "Cell: " << RC << '\n';
2498  dbgs() << "Expected bitfield size: " << Len << " bits, "
2499  << (Signed ? "sign" : "zero") << "-extended\n";
2500  });
2501 
2502  bool Changed = false;
2503 
2504  for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
2505  if (!BT.has(R))
2506  continue;
2507  const BitTracker::RegisterCell &SC = BT.lookup(R);
2508  unsigned SW = SC.width();
2509 
2510  // The source can be longer than the destination, as long as its size is
2511  // a multiple of the size of the destination. Also, we would need to be
2512  // able to refer to the subregister in the source that would be of the
2513  // same size as the destination, but only check the sizes here.
2514  if (SW < RW || (SW % RW) != 0)
2515  continue;
2516 
2517  // The field can start at any offset in SC as long as it contains Len
2518  // bits and does not cross subregister boundary (if the source register
2519  // is longer than the destination).
2520  unsigned Off = 0;
2521  while (Off <= SW-Len) {
2522  unsigned OE = (Off+Len)/RW;
2523  if (OE != Off/RW) {
2524  // The assumption here is that if the source (R) is longer than the
2525  // destination, then the destination is a sequence of words of
2526  // size RW, and each such word in R can be accessed via a subregister.
2527  //
2528  // If the beginning and the end of the field cross the subregister
2529  // boundary, advance to the next subregister.
2530  Off = OE*RW;
2531  continue;
2532  }
2533  if (HBS::isEqual(RC, 0, SC, Off, Len))
2534  break;
2535  ++Off;
2536  }
2537 
2538  if (Off > SW-Len)
2539  continue;
2540 
2541  // Found match.
2542  unsigned ExtOpc = 0;
2543  if (Off == 0) {
2544  if (Len == 8)
2545  ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2546  else if (Len == 16)
2547  ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2548  else if (Len < 10 && !Signed)
2549  ExtOpc = Hexagon::A2_andir;
2550  }
2551  if (ExtOpc == 0) {
2552  ExtOpc =
2553  Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
2554  : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2555  }
2556  unsigned SR = 0;
2557  // This only recognizes isub_lo and isub_hi.
2558  if (RW != SW && RW*2 != SW)
2559  continue;
2560  if (RW != SW)
2561  SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2562  Off = Off % RW;
2563 
2564  if (!validateReg({R,SR}, ExtOpc, 1))
2565  continue;
2566 
2567  // Don't generate the same instruction as the one being optimized.
2568  if (MI->getOpcode() == ExtOpc) {
2569  // All possible ExtOpc's have the source in operand(1).
2570  const MachineOperand &SrcOp = MI->getOperand(1);
2571  if (SrcOp.getReg() == R)
2572  continue;
2573  }
2574 
2575  DebugLoc DL = MI->getDebugLoc();
2576  MachineBasicBlock &B = *MI->getParent();
2577  Register NewR = MRI.createVirtualRegister(FRC);
2578  auto At = MI->isPHI() ? B.getFirstNonPHI()
2580  auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
2581  .addReg(R, 0, SR);
2582  switch (ExtOpc) {
2583  case Hexagon::A2_sxtb:
2584  case Hexagon::A2_zxtb:
2585  case Hexagon::A2_sxth:
2586  case Hexagon::A2_zxth:
2587  break;
2588  case Hexagon::A2_andir:
2589  MIB.addImm((1u << Len) - 1);
2590  break;
2591  case Hexagon::S4_extract:
2592  case Hexagon::S2_extractu:
2593  case Hexagon::S4_extractp:
2594  case Hexagon::S2_extractup:
2595  MIB.addImm(Len)
2596  .addImm(Off);
2597  break;
2598  default:
2599  llvm_unreachable("Unexpected opcode");
2600  }
2601 
2602  HBS::replaceReg(RD.Reg, NewR, MRI);
2603  BT.put(BitTracker::RegisterRef(NewR), RC);
2604  Changed = true;
2605  break;
2606  }
2607 
2608  return Changed;
2609 }
2610 
2611 bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
2613  unsigned Opc = MI->getOpcode();
2614  if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
2615  return false;
2616  MachineOperand &CmpOp = MI->getOperand(2);
2617  if (!CmpOp.isImm() || CmpOp.getImm() != 0)
2618  return false;
2619 
2620  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2621  if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2622  return false;
2623  assert(RD.Sub == 0);
2624 
2625  MachineBasicBlock &B = *MI->getParent();
2626  const DebugLoc &DL = MI->getDebugLoc();
2627  auto At = MI->isPHI() ? B.getFirstNonPHI()
2629  bool KnownZ = true;
2630  bool KnownNZ = false;
2631 
2632  BitTracker::RegisterRef SR = MI->getOperand(1);
2633  if (!BT.has(SR.Reg))
2634  return false;
2635  const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
2636  unsigned F, W;
2637  if (!HBS::getSubregMask(SR, F, W, MRI))
2638  return false;
2639 
2640  for (uint16_t I = F; I != F+W; ++I) {
2641  const BitTracker::BitValue &V = SC[I];
2642  if (!V.is(0))
2643  KnownZ = false;
2644  if (V.is(1))
2645  KnownNZ = true;
2646  }
2647 
2648  auto ReplaceWithConst = [&](int C) {
2649  Register NewR = MRI.createVirtualRegister(FRC);
2650  BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
2651  .addImm(C);
2652  HBS::replaceReg(RD.Reg, NewR, MRI);
2653  BitTracker::RegisterCell NewRC(W);
2654  for (uint16_t I = 0; I != W; ++I) {
2655  NewRC[I] = BitTracker::BitValue(C & 1);
2656  C = unsigned(C) >> 1;
2657  }
2658  BT.put(BitTracker::RegisterRef(NewR), NewRC);
2659  return true;
2660  };
2661 
2662  auto IsNonZero = [] (const MachineOperand &Op) {
2663  if (Op.isGlobal() || Op.isBlockAddress())
2664  return true;
2665  if (Op.isImm())
2666  return Op.getImm() != 0;
2667  if (Op.isCImm())
2668  return !Op.getCImm()->isZero();
2669  if (Op.isFPImm())
2670  return !Op.getFPImm()->isZero();
2671  return false;
2672  };
2673 
2674  auto IsZero = [] (const MachineOperand &Op) {
2675  if (Op.isGlobal() || Op.isBlockAddress())
2676  return false;
2677  if (Op.isImm())
2678  return Op.getImm() == 0;
2679  if (Op.isCImm())
2680  return Op.getCImm()->isZero();
2681  if (Op.isFPImm())
2682  return Op.getFPImm()->isZero();
2683  return false;
2684  };
2685 
2686  // If the source register is known to be 0 or non-0, the comparison can
2687  // be folded to a load of a constant.
2688  if (KnownZ || KnownNZ) {
2689  assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
2690  return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
2691  }
2692 
2693  // Special case: if the compare comes from a C2_muxii, then we know the
2694  // two possible constants that can be the source value.
2695  MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
2696  if (!InpDef)
2697  return false;
2698  if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
2699  MachineOperand &Src1 = InpDef->getOperand(2);
2700  MachineOperand &Src2 = InpDef->getOperand(3);
2701  // Check if both are non-zero.
2702  bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
2703  if (KnownNZ1 && KnownNZ2)
2704  return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
2705  // Check if both are zero.
2706  bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
2707  if (KnownZ1 && KnownZ2)
2708  return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
2709 
2710  // If for both operands we know that they are either 0 or non-0,
2711  // replace the comparison with a C2_muxii, using the same predicate
2712  // register, but with operands substituted with 0/1 accordingly.
2713  if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
2714  Register NewR = MRI.createVirtualRegister(FRC);
2715  BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
2716  .addReg(InpDef->getOperand(1).getReg())
2717  .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
2718  .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
2719  HBS::replaceReg(RD.Reg, NewR, MRI);
2720  // Create a new cell with only the least significant bit unknown.
2721  BitTracker::RegisterCell NewRC(W);
2722  NewRC[0] = BitTracker::BitValue::self();
2723  NewRC.fill(1, W, BitTracker::BitValue::Zero);
2724  BT.put(BitTracker::RegisterRef(NewR), NewRC);
2725  return true;
2726  }
2727  }
2728 
2729  return false;
2730 }
2731 
2732 bool BitSimplification::processBlock(MachineBasicBlock &B,
2733  const RegisterSet &AVs) {
2734  if (!BT.reached(&B))
2735  return false;
2736  bool Changed = false;
2737  RegisterSet AVB = AVs;
2738  RegisterSet Defs;
2739 
2740  for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
2741  MachineInstr *MI = &*I;
2742  Defs.clear();
2743  HBS::getInstrDefs(*MI, Defs);
2744 
2745  unsigned Opc = MI->getOpcode();
2746  if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2747  continue;
2748 
2749  if (MI->mayStore()) {
2750  bool T = genStoreUpperHalf(MI);
2751  T = T || genStoreImmediate(MI);
2752  Changed |= T;
2753  continue;
2754  }
2755 
2756  if (Defs.count() != 1)
2757  continue;
2758  const MachineOperand &Op0 = MI->getOperand(0);
2759  if (!Op0.isReg() || !Op0.isDef())
2760  continue;
2761  BitTracker::RegisterRef RD = Op0;
2762  if (!BT.has(RD.Reg))
2763  continue;
2764  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2765  const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
2766 
2767  if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
2768  bool T = genPackhl(MI, RD, RC);
2769  T = T || simplifyExtractLow(MI, RD, RC, AVB);
2770  Changed |= T;
2771  continue;
2772  }
2773 
2774  if (FRC->getID() == Hexagon::IntRegsRegClassID) {
2775  bool T = genBitSplit(MI, RD, RC, AVB);
2776  T = T || simplifyExtractLow(MI, RD, RC, AVB);
2777  T = T || genExtractHalf(MI, RD, RC);
2778  T = T || genCombineHalf(MI, RD, RC);
2779  T = T || genExtractLow(MI, RD, RC);
2780  T = T || simplifyRCmp0(MI, RD);
2781  Changed |= T;
2782  continue;
2783  }
2784 
2785  if (FRC->getID() == Hexagon::PredRegsRegClassID) {
2786  bool T = simplifyTstbit(MI, RD, RC);
2787  Changed |= T;
2788  continue;
2789  }
2790  }
2791  return Changed;
2792 }
2793 
2794 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
2795  if (skipFunction(MF.getFunction()))
2796  return false;
2797 
2798  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2799  auto &HRI = *HST.getRegisterInfo();
2800  auto &HII = *HST.getInstrInfo();
2801 
2802  MDT = &getAnalysis<MachineDominatorTree>();
2804  bool Changed;
2805 
2806  Changed = DeadCodeElimination(MF, *MDT).run();
2807 
2808  const HexagonEvaluator HE(HRI, MRI, HII, MF);
2809  BitTracker BT(HE, MF);
2810  LLVM_DEBUG(BT.trace(true));
2811  BT.run();
2812 
2813  MachineBasicBlock &Entry = MF.front();
2814 
2815  RegisterSet AIG; // Available registers for IG.
2816  ConstGeneration ImmG(BT, HII, MRI);
2817  Changed |= visitBlock(Entry, ImmG, AIG);
2818 
2819  RegisterSet ARE; // Available registers for RIE.
2820  RedundantInstrElimination RIE(BT, HII, HRI, MRI);
2821  bool Ried = visitBlock(Entry, RIE, ARE);
2822  if (Ried) {
2823  Changed = true;
2824  BT.run();
2825  }
2826 
2827  RegisterSet ACG; // Available registers for CG.
2828  CopyGeneration CopyG(BT, HII, HRI, MRI);
2829  Changed |= visitBlock(Entry, CopyG, ACG);
2830 
2831  RegisterSet ACP; // Available registers for CP.
2832  CopyPropagation CopyP(HRI, MRI);
2833  Changed |= visitBlock(Entry, CopyP, ACP);
2834 
2835  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2836 
2837  BT.run();
2838  RegisterSet ABS; // Available registers for BS.
2839  BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
2840  Changed |= visitBlock(Entry, BitS, ABS);
2841 
2842  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2843 
2844  if (Changed) {
2845  for (auto &B : MF)
2846  for (auto &I : B)
2847  I.clearKillInfo();
2848  DeadCodeElimination(MF, *MDT).run();
2849  }
2850  return Changed;
2851 }
2852 
2853 // Recognize loops where the code at the end of the loop matches the code
2854 // before the entry of the loop, and the matching code is such that is can
2855 // be simplified. This pass relies on the bit simplification above and only
2856 // prepares code in a way that can be handled by the bit simplifcation.
2857 //
2858 // This is the motivating testcase (and explanation):
2859 //
2860 // {
2861 // loop0(.LBB0_2, r1) // %for.body.preheader
2862 // r5:4 = memd(r0++#8)
2863 // }
2864 // {
2865 // r3 = lsr(r4, #16)
2866 // r7:6 = combine(r5, r5)
2867 // }
2868 // {
2869 // r3 = insert(r5, #16, #16)
2870 // r7:6 = vlsrw(r7:6, #16)
2871 // }
2872 // .LBB0_2:
2873 // {
2874 // memh(r2+#4) = r5
2875 // memh(r2+#6) = r6 # R6 is really R5.H
2876 // }
2877 // {
2878 // r2 = add(r2, #8)
2879 // memh(r2+#0) = r4
2880 // memh(r2+#2) = r3 # R3 is really R4.H
2881 // }
2882 // {
2883 // r5:4 = memd(r0++#8)
2884 // }
2885 // { # "Shuffling" code that sets up R3 and R6
2886 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2887 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2888 // } # the stores if the code was at the beginning
2889 // { # of the loop iteration. Since the same code
2890 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2891 // r7:6 = vlsrw(r7:6, #16) # there.
2892 // }:endloop0
2893 //
2894 //
2895 // The outcome:
2896 //
2897 // {
2898 // loop0(.LBB0_2, r1)
2899 // r5:4 = memd(r0++#8)
2900 // }
2901 // .LBB0_2:
2902 // {
2903 // memh(r2+#4) = r5
2904 // memh(r2+#6) = r5.h
2905 // }
2906 // {
2907 // r2 = add(r2, #8)
2908 // memh(r2+#0) = r4
2909 // memh(r2+#2) = r4.h
2910 // }
2911 // {
2912 // r5:4 = memd(r0++#8)
2913 // }:endloop0
2914 
2915 namespace llvm {
2916 
2919 
2920 } // end namespace llvm
2921 
2922 namespace {
2923 
2924  class HexagonLoopRescheduling : public MachineFunctionPass {
2925  public:
2926  static char ID;
2927 
2928  HexagonLoopRescheduling() : MachineFunctionPass(ID) {
2930  }
2931 
2932  bool runOnMachineFunction(MachineFunction &MF) override;
2933 
2934  private:
2935  const HexagonInstrInfo *HII = nullptr;
2936  const HexagonRegisterInfo *HRI = nullptr;
2937  MachineRegisterInfo *MRI = nullptr;
2938  BitTracker *BTP = nullptr;
2939 
2940  struct LoopCand {
2941  LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
2942  MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
2943 
2944  MachineBasicBlock *LB, *PB, *EB;
2945  };
2946  using InstrList = std::vector<MachineInstr *>;
2947  struct InstrGroup {
2948  BitTracker::RegisterRef Inp, Out;
2949  InstrList Ins;
2950  };
2951  struct PhiInfo {
2952  PhiInfo(MachineInstr &P, MachineBasicBlock &B);
2953 
2954  unsigned DefR;
2955  BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
2956  MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block
2957  };
2958 
2959  static unsigned getDefReg(const MachineInstr *MI);
2960  bool isConst(unsigned Reg) const;
2961  bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
2962  bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
2963  bool isShuffleOf(unsigned OutR, unsigned InpR) const;
2964  bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
2965  unsigned &InpR2) const;
2966  void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
2967  MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
2968  bool processLoop(LoopCand &C);
2969  };
2970 
2971 } // end anonymous namespace
2972 
2974 
2975 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
2976  "Hexagon Loop Rescheduling", false, false)
2977 
2978 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
2979  MachineBasicBlock &B) {
2980  DefR = HexagonLoopRescheduling::getDefReg(&P);
2981  LB = &B;
2982  PB = nullptr;
2983  for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2984  const MachineOperand &OpB = P.getOperand(i+1);
2985  if (OpB.getMBB() == &B) {
2986  LR = P.getOperand(i);
2987  continue;
2988  }
2989  PB = OpB.getMBB();
2990  PR = P.getOperand(i);
2991  }
2992 }
2993 
2994 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
2995  RegisterSet Defs;
2996  HBS::getInstrDefs(*MI, Defs);
2997  if (Defs.count() != 1)
2998  return 0;
2999  return Defs.find_first();
3000 }
3001 
3002 bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
3003  if (!BTP->has(Reg))
3004  return false;
3005  const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
3006  for (unsigned i = 0, w = RC.width(); i < w; ++i) {
3007  const BitTracker::BitValue &V = RC[i];
3008  if (!V.is(0) && !V.is(1))
3009  return false;
3010  }
3011  return true;
3012 }
3013 
3014 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
3015  unsigned DefR) const {
3016  unsigned Opc = MI->getOpcode();
3017  switch (Opc) {
3018  case TargetOpcode::COPY:
3019  case Hexagon::S2_lsr_i_r:
3020  case Hexagon::S2_asr_i_r:
3021  case Hexagon::S2_asl_i_r:
3022  case Hexagon::S2_lsr_i_p:
3023  case Hexagon::S2_asr_i_p:
3024  case Hexagon::S2_asl_i_p:
3025  case Hexagon::S2_insert:
3026  case Hexagon::A2_or:
3027  case Hexagon::A2_orp:
3028  case Hexagon::A2_and:
3029  case Hexagon::A2_andp:
3030  case Hexagon::A2_combinew:
3031  case Hexagon::A4_combineri:
3032  case Hexagon::A4_combineir:
3033  case Hexagon::A2_combineii:
3034  case Hexagon::A4_combineii:
3035  case Hexagon::A2_combine_ll:
3036  case Hexagon::A2_combine_lh:
3037  case Hexagon::A2_combine_hl:
3038  case Hexagon::A2_combine_hh:
3039  return true;
3040  }
3041  return false;
3042 }
3043 
3044 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
3045  unsigned InpR) const {
3046  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
3047  const MachineOperand &Op = MI->getOperand(i);
3048  if (!Op.isReg())
3049  continue;
3050  if (Op.getReg() == InpR)
3051  return i == n-1;
3052  }
3053  return false;
3054 }
3055 
3056 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
3057  if (!BTP->has(OutR) || !BTP->has(InpR))
3058  return false;
3059  const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
3060  for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
3061  const BitTracker::BitValue &V = OutC[i];
3063  continue;
3064  if (V.RefI.Reg != InpR)
3065  return false;
3066  }
3067  return true;
3068 }
3069 
3070 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
3071  unsigned OutR2, unsigned &InpR2) const {
3072  if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
3073  return false;
3074  const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
3075  const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
3076  unsigned W = OutC1.width();
3077  unsigned MatchR = 0;
3078  if (W != OutC2.width())
3079  return false;
3080  for (unsigned i = 0; i < W; ++i) {
3081  const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
3082  if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
3083  return false;
3084  if (V1.Type != BitTracker::BitValue::Ref)
3085  continue;
3086  if (V1.RefI.Pos != V2.RefI.Pos)
3087  return false;
3088  if (V1.RefI.Reg != InpR1)
3089  return false;
3090  if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
3091  return false;
3092  if (!MatchR)
3093  MatchR = V2.RefI.Reg;
3094  else if (V2.RefI.Reg != MatchR)
3095  return false;
3096  }
3097  InpR2 = MatchR;
3098  return true;
3099 }
3100 
3101 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
3102  MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
3103  unsigned NewPredR) {
3105 
3106  const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
3107  Register PhiR = MRI->createVirtualRegister(PhiRC);
3108  BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
3109  .addReg(NewPredR)
3110  .addMBB(&PB)
3111  .addReg(G.Inp.Reg)
3112  .addMBB(&LB);
3113  RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
3114 
3115  for (const MachineInstr *SI : llvm::reverse(G.Ins)) {
3116  unsigned DR = getDefReg(SI);
3117  const TargetRegisterClass *RC = MRI->getRegClass(DR);
3118  Register NewDR = MRI->createVirtualRegister(RC);
3119  DebugLoc DL = SI->getDebugLoc();
3120 
3121  auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
3122  for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
3123  const MachineOperand &Op = SI->getOperand(j);
3124  if (!Op.isReg()) {
3125  MIB.add(Op);
3126  continue;
3127  }
3128  if (!Op.isUse())
3129  continue;
3130  unsigned UseR = RegMap[Op.getReg()];
3131  MIB.addReg(UseR, 0, Op.getSubReg());
3132  }
3133  RegMap.insert(std::make_pair(DR, NewDR));
3134  }
3135 
3136  HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
3137 }
3138 
3139 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
3140  LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB)
3141  << "\n");
3142  std::vector<PhiInfo> Phis;
3143  for (auto &I : *C.LB) {
3144  if (!I.isPHI())
3145  break;
3146  unsigned PR = getDefReg(&I);
3147  if (isConst(PR))
3148  continue;
3149  bool BadUse = false, GoodUse = false;
3150  for (const MachineOperand &MO : MRI->use_operands(PR)) {
3151  const MachineInstr *UseI = MO.getParent();
3152  if (UseI->getParent() != C.LB) {
3153  BadUse = true;
3154  break;
3155  }
3156  if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
3157  GoodUse = true;
3158  }
3159  if (BadUse || !GoodUse)
3160  continue;
3161 
3162  Phis.push_back(PhiInfo(I, *C.LB));
3163  }
3164 
3165  LLVM_DEBUG({
3166  dbgs() << "Phis: {";
3167  for (auto &I : Phis) {
3168  dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi("
3169  << printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
3170  << ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
3171  << I.LB->getNumber() << ')';
3172  }
3173  dbgs() << " }\n";
3174  });
3175 
3176  if (Phis.empty())
3177  return false;
3178 
3179  bool Changed = false;
3180  InstrList ShufIns;
3181 
3182  // Go backwards in the block: for each bit shuffling instruction, check
3183  // if that instruction could potentially be moved to the front of the loop:
3184  // the output of the loop cannot be used in a non-shuffling instruction
3185  // in this loop.
3186  for (MachineInstr &MI : llvm::reverse(*C.LB)) {
3187  if (MI.isTerminator())
3188  continue;
3189  if (MI.isPHI())
3190  break;
3191 
3192  RegisterSet Defs;
3193  HBS::getInstrDefs(MI, Defs);
3194  if (Defs.count() != 1)
3195  continue;
3196  Register DefR = Defs.find_first();
3197  if (!DefR.isVirtual())
3198  continue;
3199  if (!isBitShuffle(&MI, DefR))
3200  continue;
3201 
3202  bool BadUse = false;
3203  for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
3204  MachineInstr *UseI = UI->getParent();
3205  if (UseI->getParent() == C.LB) {
3206  if (UseI->isPHI()) {
3207  // If the use is in a phi node in this loop, then it should be
3208  // the value corresponding to the back edge.
3209  unsigned Idx = UI.getOperandNo();
3210  if (UseI->getOperand(Idx+1).getMBB() != C.LB)
3211  BadUse = true;
3212  } else {
3213  if (!llvm::is_contained(ShufIns, UseI))
3214  BadUse = true;
3215  }
3216  } else {
3217  // There is a use outside of the loop, but there is no epilog block
3218  // suitable for a copy-out.
3219  if (C.EB == nullptr)
3220  BadUse = true;
3221  }
3222  if (BadUse)
3223  break;
3224  }
3225 
3226  if (BadUse)
3227  continue;
3228  ShufIns.push_back(&MI);
3229  }
3230 
3231  // Partition the list of shuffling instructions into instruction groups,
3232  // where each group has to be moved as a whole (i.e. a group is a chain of
3233  // dependent instructions). A group produces a single live output register,
3234  // which is meant to be the input of the loop phi node (although this is
3235  // not checked here yet). It also uses a single register as its input,
3236  // which is some value produced in the loop body. After moving the group
3237  // to the beginning of the loop, that input register would need to be
3238  // the loop-carried register (through a phi node) instead of the (currently
3239  // loop-carried) output register.
3240  using InstrGroupList = std::vector<InstrGroup>;
3241  InstrGroupList Groups;
3242 
3243  for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3244  MachineInstr *SI = ShufIns[i];
3245  if (SI == nullptr)
3246  continue;
3247 
3248  InstrGroup G;
3249  G.Ins.push_back(SI);
3250  G.Out.Reg = getDefReg(SI);
3251  RegisterSet Inputs;
3252  HBS::getInstrUses(*SI, Inputs);
3253 
3254  for (unsigned j = i+1; j < n; ++j) {
3255  MachineInstr *MI = ShufIns[j];
3256  if (MI == nullptr)
3257  continue;
3258  RegisterSet Defs;
3259  HBS::getInstrDefs(*MI, Defs);
3260  // If this instruction does not define any pending inputs, skip it.
3261  if (!Defs.intersects(Inputs))
3262  continue;
3263  // Otherwise, add it to the current group and remove the inputs that
3264  // are defined by MI.
3265  G.Ins.push_back(MI);
3266  Inputs.remove(Defs);
3267  // Then add all registers used by MI.
3268  HBS::getInstrUses(*MI, Inputs);
3269  ShufIns[j] = nullptr;
3270  }
3271 
3272  // Only add a group if it requires at most one register.
3273  if (Inputs.count() > 1)
3274  continue;
3275  auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3276  return G.Out.Reg == P.LR.Reg;
3277  };
3278  if (llvm::none_of(Phis, LoopInpEq))
3279  continue;
3280 
3281  G.Inp.Reg = Inputs.find_first();
3282  Groups.push_back(G);
3283  }
3284 
3285  LLVM_DEBUG({
3286  for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
3287  InstrGroup &G = Groups[i];
3288  dbgs() << "Group[" << i << "] inp: "
3289  << printReg(G.Inp.Reg, HRI, G.Inp.Sub)
3290  << " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
3291  for (const MachineInstr *MI : G.Ins)
3292  dbgs() << " " << MI;
3293  }
3294  });
3295 
3296  for (InstrGroup &G : Groups) {
3297  if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
3298  continue;
3299  auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3300  return G.Out.Reg == P.LR.Reg;
3301  };
3302  auto F = llvm::find_if(Phis, LoopInpEq);
3303  if (F == Phis.end())
3304  continue;
3305  unsigned PrehR = 0;
3306  if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
3307  const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
3308  unsigned Opc = DefPrehR->getOpcode();
3309  if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3310  continue;
3311  if (!DefPrehR->getOperand(1).isImm())
3312  continue;
3313  if (DefPrehR->getOperand(1).getImm() != 0)
3314  continue;
3315  const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
3316  if (RC != MRI->getRegClass(F->PR.Reg)) {
3317  PrehR = MRI->createVirtualRegister(RC);
3318  unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3319  : Hexagon::A2_tfrpi;
3320  auto T = C.PB->getFirstTerminator();
3321  DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
3322  BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
3323  .addImm(0);
3324  } else {
3325  PrehR = F->PR.Reg;
3326  }
3327  }
3328  // isSameShuffle could match with PrehR being of a wider class than
3329  // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3330  // it would match for the input being a 32-bit register, and PrehR
3331  // being a 64-bit register (where the low 32 bits match). This could
3332  // be handled, but for now skip these cases.
3333  if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
3334  continue;
3335  moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
3336  Changed = true;
3337  }
3338 
3339  return Changed;
3340 }
3341 
3342 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
3343  if (skipFunction(MF.getFunction()))
3344  return false;
3345 
3346  auto &HST = MF.getSubtarget<HexagonSubtarget>();
3347  HII = HST.getInstrInfo();
3348  HRI = HST.getRegisterInfo();
3349  MRI = &MF.getRegInfo();
3350  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
3351  BitTracker BT(HE, MF);
3352  LLVM_DEBUG(BT.trace(true));
3353  BT.run();
3354  BTP = &BT;
3355 
3356  std::vector<LoopCand> Cand;
3357 
3358  for (auto &B : MF) {
3359  if (B.pred_size() != 2 || B.succ_size() != 2)
3360  continue;
3361  MachineBasicBlock *PB = nullptr;
3362  bool IsLoop = false;
3363  for (MachineBasicBlock *Pred : B.predecessors()) {
3364  if (Pred != &B)
3365  PB = Pred;
3366  else
3367  IsLoop = true;
3368  }
3369  if (!IsLoop)
3370  continue;
3371 
3372  MachineBasicBlock *EB = nullptr;
3373  for (MachineBasicBlock *Succ : B.successors()) {
3374  if (Succ == &B)
3375  continue;
3376  // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3377  // edge from B to EP is non-critical.
3378  if (Succ->pred_size() == 1)
3379  EB = Succ;
3380  break;
3381  }
3382 
3383  Cand.push_back(LoopCand(&B, PB, EB));
3384  }
3385 
3386  bool Changed = false;
3387  for (auto &C : Cand)
3388  Changed |= processLoop(C);
3389 
3390  return Changed;
3391 }
3392 
3393 //===----------------------------------------------------------------------===//
3394 // Public Constructor Functions
3395 //===----------------------------------------------------------------------===//
3396 
3398  return new HexagonLoopRescheduling();
3399 }
3400 
3402  return new HexagonBitSimplify();
3403 }
llvm::MachineInstr::isDebugValue
bool isDebugValue() const
Definition: MachineInstr.h:1218
i
i
Definition: README.txt:29
llvm::MSP430ISD::RRC
@ RRC
Y = RRC X, rotate right via carry.
Definition: MSP430ISelLowering.h:36
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
llvm::MachineRegisterInfo::markUsesInDebugValueAsUndef
void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
Definition: MachineRegisterInfo.cpp:528
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4635
B1
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:70
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1619
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1216
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
simplify
hexagon bit simplify
Definition: HexagonBitSimplify.cpp:289
llvm::BitTracker::lookup
const RegisterCell & lookup(unsigned Reg) const
Definition: BitTracker.h:357
MCInstrDesc.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::BitTracker::BitValue::RefI
BitRef RefI
Definition: BitTracker.h:192
StringRef.h
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1215
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitTracker::RegisterRef::Sub
unsigned Sub
Definition: BitTracker.h:147
llvm::SmallVector< unsigned, 2 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:116
HexagonSubtarget.h
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::createHexagonLoopRescheduling
FunctionPass * createHexagonLoopRescheduling()
Definition: HexagonBitSimplify.cpp:3397
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:151
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify", "Hexagon bit simplification", false, false) INITIALIZE_PASS_END(HexagonBitSimplify
intersects
static Optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Definition: DbgEntityHistoryCalculator.cpp:114
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::SPIRV::InstrList
SmallVector< MachineInstr * > InstrList
Definition: SPIRVModuleAnalysis.h:41
llvm::BitTracker::BitValue::Ref
@ Ref
Definition: BitTracker.h:160
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:232
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1909
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used
@ Used
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:477
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1293
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
STLExtras.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
RegisterSet
SmallSet< unsigned, 4 > RegisterSet
Definition: Thumb2ITBlockPass.cpp:39
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:55
llvm::BitTracker::visit
void visit(const MachineInstr &MI)
Definition: BitTracker.cpp:1033
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RegisterSetLimit
static cl::opt< unsigned > RegisterSetLimit("hexbit-registerset-limit", cl::Hidden, cl::init(1000))
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::BitTracker::RegisterRef::Reg
Register Reg
Definition: BitTracker.h:146
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GraphTraits.h
clear
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:149
CommandLine.h
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:865
BitTracker.h
llvm::BitTracker::BitValue::is
bool is(unsigned T) const
Definition: BitTracker.h:209
simplification
hexagon bit Hexagon bit simplification
Definition: HexagonBitSimplify.cpp:290
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:834
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:418
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:518
llvm::BitTracker::RegisterCell::width
uint16_t width() const
Definition: BitTracker.h:303
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:782
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
llvm::BitTracker
Definition: BitTracker.h:35
llvm::HexagonEvaluator
Definition: HexagonBitTracker.h:25
false
Definition: StackSlotColoring.cpp:141
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:455
MaxBitSplit
static cl::opt< unsigned > MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
BitVector.h
CountBitSplit
static unsigned CountBitSplit
Definition: HexagonBitSimplify.cpp:64
DebugLoc.h
llvm::BitTracker::BitValue::self
static BitValue self(const BitRef &Self=BitRef())
Definition: BitTracker.h:280
llvm::initializeHexagonBitSimplifyPass
void initializeHexagonBitSimplifyPass(PassRegistry &Registry)
llvm::BitVector
Definition: BitVector.h:75
HexagonInstrInfo.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::BitTracker::run
void run()
Definition: BitTracker.cpp:1119
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
GenBitSplit
static cl::opt< bool > GenBitSplit("hexbit-bitsplit", cl::Hidden, cl::init(true), cl::desc("Generate bitsplit instructions"))
llvm::isInt< 8 >
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:367
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::cl::opt< bool >
PreserveTiedOps
static cl::opt< bool > PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands"))
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:420
llvm::createHexagonBitSimplify
FunctionPass * createHexagonBitSimplify()
Definition: HexagonBitSimplify.cpp:3401
llvm::HexagonISD::CONST32
@ CONST32
Definition: HexagonISelLowering.h:37
llvm::BitTracker::BitRef::Reg
Register Reg
Definition: BitTracker.h:134
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1697
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::BitTracker::put
void put(RegisterRef RR, const RegisterCell &RC)
Definition: BitTracker.cpp:994
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1625
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::DenseMap< unsigned, unsigned >
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1670
MachineFunctionPass.h
HexagonRegisterInfo.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::BitTracker::reached
bool reached(const MachineBasicBlock *B) const
Definition: BitTracker.cpp:1025
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::Hexagon::ps_sub_hi
@ ps_sub_hi
Definition: HexagonRegisterInfo.h:26
llvm::initializeHexagonLoopReschedulingPass
void initializeHexagonLoopReschedulingPass(PassRegistry &)
llvm::SPIRV::Capability::Groups
@ Groups
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1257
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::BitTracker::has
bool has(unsigned Reg) const
Definition: BitTracker.h:352
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::MachineFunction
Definition: MachineFunction.h:241
GenExtract
static cl::opt< bool > GenExtract("hexbit-extract", cl::Hidden, cl::init(true), cl::desc("Generate extract instructions"))
llvm::HexagonInstrInfo
Definition: HexagonInstrInfo.h:38
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
llvm::BitTracker::get
RegisterCell get(RegisterRef RR) const
Definition: BitTracker.cpp:990
llvm::Hexagon::ps_sub_lo
@ ps_sub_lo
Definition: HexagonRegisterInfo.h:26
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:242
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::IsConst
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
Definition: AMDGPUMetadata.h:195
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
HexagonBitTracker.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:178
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm::BitTracker::BitValue::Zero
@ Zero
Definition: BitTracker.h:158
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:472
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::BitTracker::RegisterRef
Definition: BitTracker.h:141
Compiler.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DC
static ManagedStatic< DebugCounter > DC
Definition: DebugCounter.cpp:70
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::DomTreeNodeBase
Base class for the actual dominator tree node.
Definition: LiveIntervalCalc.h:24
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:364
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:663
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1632
llvm::BitTracker::RegisterCell
Definition: BitTracker.h:300
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
MaxExtract
static cl::opt< unsigned > MaxExtract("hexbit-max-extract", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
H
#define H(x, y, z)
Definition: MD5.cpp:57
bit
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional ldr LCPI1_0 ldr ldr tst movne lsr ldr LCPI1_1 and r0 bx lr it saves an instruction and a register It might be profitable to cse MOVi16 if there are lots of bit immediates with the same bottom half Robert Muth started working on an alternate jump table implementation that does not put the tables in line in the text This is more like the llvm default jump table implementation This might be useful sometime Several revisions of patches are on the mailing beginning while CMP sets them like a subtract Therefore to be able to use CMN for comparisons other than the Z bit
Definition: README.txt:584
llvm::HexagonSubtarget::getRegisterInfo
const HexagonRegisterInfo * getRegisterInfo() const override
Definition: HexagonSubtarget.h:125
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:275
uint16_t
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:326
x
TODO unsigned x
Definition: README.txt:10
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:1834
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:417
llvm::BitTracker::BitRef::Pos
uint16_t Pos
Definition: BitTracker.h:135
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:475
llvm::Registry
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
llvm::BitTracker::BitValue::One
@ One
Definition: BitTracker.h:159
SmallVector.h
MachineInstrBuilder.h
BT
BitTracker BT
Definition: BitTracker.cpp:73
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
INITIALIZE_PASS
INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", "Hexagon Loop Rescheduling", false, false) HexagonLoopRescheduling
Definition: HexagonBitSimplify.cpp:2975
llvm::BitTracker::BitValue::Type
ValueType Type
Definition: BitTracker.h:191
llvm::HexagonRegisterInfo::getHexagonSubRegIndex
unsigned getHexagonSubRegIndex(const TargetRegisterClass &RC, unsigned GenIdx) const
Definition: HexagonRegisterInfo.cpp:420
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
MachineOperand.h
CountExtract
static unsigned CountExtract
Definition: HexagonBitSimplify.cpp:61
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::HexagonRegisterInfo
Definition: HexagonRegisterInfo.h:29
llvm::Register::virtReg2Index
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BitTracker::trace
void trace(bool On=false)
Definition: BitTracker.h:50
raw_ostream.h
llvm::BitTracker::BitValue
Definition: BitTracker.h:155
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::SrcOp
Definition: MachineIRBuilder.h:126
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37