LLVM  10.0.0svn
HexagonVectorLoopCarriedReuse.cpp
Go to the documentation of this file.
1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
12 // types.
13 //
14 //-----------------------------------------------------------------------------
15 // Motivation: Consider the case where we have the following loop structure.
16 //
17 // Loop:
18 // t0 = a[i];
19 // t1 = f(t0);
20 // t2 = g(t1);
21 // ...
22 // t3 = a[i+1];
23 // t4 = f(t3);
24 // t5 = g(t4);
25 // t6 = op(t2, t5)
26 // cond_branch <Loop>
27 //
28 // This can be converted to
29 // t00 = a[0];
30 // t10 = f(t00);
31 // t20 = g(t10);
32 // Loop:
33 // t2 = t20;
34 // t3 = a[i+1];
35 // t4 = f(t3);
36 // t5 = g(t4);
37 // t6 = op(t2, t5)
38 // t20 = t5
39 // cond_branch <Loop>
40 //
41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42 // Such a loop comes to this pass in the following form.
43 //
44 // LoopPreheader:
45 // X0 = a[0];
46 // Loop:
47 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48 // t1 = f(X2) <-- I1
49 // t2 = g(t1)
50 // ...
51 // X1 = a[i+1]
52 // t4 = f(X1) <-- I2
53 // t5 = g(t4)
54 // t6 = op(t2, t5)
55 // cond_branch <Loop>
56 //
57 // In this pass, we look for PHIs such as X2 whose incoming values come only
58 // from the Loop Preheader and over the backedge and additionaly, both these
59 // values are the results of the same operation in terms of opcode. We call such
60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61 // over X1 is carried over only one iteration and so the DepChain is only one
62 // PHI node long.
63 //
64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65 // PHI coming over the backedge (X1). We stop at the first pair of such users
66 // I1 (of X2) and I2 (of X1) that meet the following conditions.
67 // 1. I1 and I2 are the same operation, but with different operands.
68 // 2. X2 and X1 are used at the same operand number in the two instructions.
69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71 //
72 // We then make the following transformation
73 // LoopPreheader:
74 // X0 = a[0];
75 // Y0 = f(X0);
76 // Loop:
77 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79 // t1 = f(X2) <-- Will be removed by DCE.
80 // t2 = g(Y2)
81 // ...
82 // X1 = a[i+1]
83 // t4 = f(X1)
84 // t5 = g(t4)
85 // t6 = op(t2, t5)
86 // cond_branch <Loop>
87 //
88 // We proceed until we cannot find any more such instructions I1 and I2.
89 //
90 // --- DepChains & Loop carried dependences ---
91 // Consider a single basic block loop such as
92 //
93 // LoopPreheader:
94 // X0 = ...
95 // Y0 = ...
96 // Loop:
97 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99 // ...
100 // X1 = ...
101 // ...
102 // cond_branch <Loop>
103 //
104 // Then there is a dependence between X2 and X1 that goes back one iteration,
105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
106 // DepChain from X2 to X1 (X2->X1).
107 // Similarly, there is a dependence between Y2 and X1 that goes back two
108 // iterations. X1 is used as Y2 two iterations after it is computed. This is
109 // represented by a DepChain as (Y2->X2->X1).
110 //
111 // A DepChain has the following properties.
112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113 // iterations of carried dependence + 1.
114 // 2. All instructions in the DepChain except the last are PHIs.
115 //
116 //===----------------------------------------------------------------------===//
117 
118 #include "llvm/ADT/SetVector.h"
119 #include "llvm/ADT/SmallVector.h"
120 #include "llvm/ADT/Statistic.h"
121 #include "llvm/Analysis/LoopInfo.h"
122 #include "llvm/Analysis/LoopPass.h"
123 #include "llvm/IR/BasicBlock.h"
124 #include "llvm/IR/DerivedTypes.h"
125 #include "llvm/IR/IRBuilder.h"
126 #include "llvm/IR/Instruction.h"
127 #include "llvm/IR/Instructions.h"
128 #include "llvm/IR/IntrinsicInst.h"
129 #include "llvm/IR/Intrinsics.h"
130 #include "llvm/IR/Use.h"
131 #include "llvm/IR/User.h"
132 #include "llvm/IR/Value.h"
133 #include "llvm/Pass.h"
134 #include "llvm/Support/Casting.h"
136 #include "llvm/Support/Compiler.h"
137 #include "llvm/Support/Debug.h"
139 #include "llvm/Transforms/Scalar.h"
140 #include "llvm/Transforms/Utils.h"
141 #include <algorithm>
142 #include <cassert>
143 #include <cstddef>
144 #include <map>
145 #include <memory>
146 #include <set>
147 
148 using namespace llvm;
149 
150 #define DEBUG_TYPE "hexagon-vlcr"
151 
152 STATISTIC(HexagonNumVectorLoopCarriedReuse,
153  "Number of values that were reused from a previous iteration.");
154 
155 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
156  cl::Hidden,
157  cl::desc("Maximum distance of loop carried dependences that are handled"),
159 
160 namespace llvm {
161 
164 
165 } // end namespace llvm
166 
167 namespace {
168 
169  // See info about DepChain in the comments at the top of this file.
170  using ChainOfDependences = SmallVector<Instruction *, 4>;
171 
172  class DepChain {
173  ChainOfDependences Chain;
174 
175  public:
176  bool isIdentical(DepChain &Other) const {
177  if (Other.size() != size())
178  return false;
179  ChainOfDependences &OtherChain = Other.getChain();
180  for (int i = 0; i < size(); ++i) {
181  if (Chain[i] != OtherChain[i])
182  return false;
183  }
184  return true;
185  }
186 
187  ChainOfDependences &getChain() {
188  return Chain;
189  }
190 
191  int size() const {
192  return Chain.size();
193  }
194 
195  void clear() {
196  Chain.clear();
197  }
198 
199  void push_back(Instruction *I) {
200  Chain.push_back(I);
201  }
202 
203  int iterations() const {
204  return size() - 1;
205  }
206 
207  Instruction *front() const {
208  return Chain.front();
209  }
210 
211  Instruction *back() const {
212  return Chain.back();
213  }
214 
215  Instruction *&operator[](const int index) {
216  return Chain[index];
217  }
218 
219  friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
220  };
221 
223  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
224  const ChainOfDependences &CD = D.Chain;
225  int ChainSize = CD.size();
226  OS << "**DepChain Start::**\n";
227  for (int i = 0; i < ChainSize -1; ++i) {
228  OS << *(CD[i]) << " -->\n";
229  }
230  OS << *CD[ChainSize-1] << "\n";
231  return OS;
232  }
233 
234  struct ReuseValue {
235  Instruction *Inst2Replace = nullptr;
236 
237  // In the new PHI node that we'll construct this is the value that'll be
238  // used over the backedge. This is teh value that gets reused from a
239  // previous iteration.
240  Instruction *BackedgeInst = nullptr;
241  std::map<Instruction *, DepChain *> DepChains;
242  int Iterations = -1;
243 
244  ReuseValue() = default;
245 
246  void reset() {
247  Inst2Replace = nullptr;
248  BackedgeInst = nullptr;
249  DepChains.clear();
250  Iterations = -1;
251  }
252  bool isDefined() { return Inst2Replace != nullptr; }
253  };
254 
256  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
257  OS << "** ReuseValue ***\n";
258  OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
259  OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
260  return OS;
261  }
262 
263  class HexagonVectorLoopCarriedReuse : public LoopPass {
264  public:
265  static char ID;
266 
267  explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
270  }
271 
272  StringRef getPassName() const override {
273  return "Hexagon-specific loop carried reuse for HVX vectors";
274  }
275 
276  void getAnalysisUsage(AnalysisUsage &AU) const override {
281  AU.setPreservesCFG();
282  }
283 
284  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
285 
286  private:
287  SetVector<DepChain *> Dependences;
288  std::set<Instruction *> ReplacedInsts;
289  Loop *CurLoop;
290  ReuseValue ReuseCandidate;
291 
292  bool doVLCR();
293  void findLoopCarriedDeps();
294  void findValueToReuse();
295  void findDepChainFromPHI(Instruction *I, DepChain &D);
296  void reuseValue();
297  Value *findValueInBlock(Value *Op, BasicBlock *BB);
298  DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
299  bool isEquivalentOperation(Instruction *I1, Instruction *I2);
300  bool canReplace(Instruction *I);
301  bool isCallInstCommutative(CallInst *C);
302  };
303 
304 } // end anonymous namespace
305 
307 
308 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
309  "Hexagon-specific predictive commoning for HVX vectors", false, false)
311 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
312 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
313 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
314  "Hexagon-specific predictive commoning for HVX vectors", false, false)
315 
316 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
317  if (skipLoop(L))
318  return false;
319 
320  if (!L->getLoopPreheader())
321  return false;
322 
323  // Work only on innermost loops.
324  if (!L->getSubLoops().empty())
325  return false;
326 
327  // Work only on single basic blocks loops.
328  if (L->getNumBlocks() != 1)
329  return false;
330 
331  CurLoop = L;
332 
333  return doVLCR();
334 }
335 
336 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
337  switch (C->getCalledFunction()->getIntrinsicID()) {
338  case Intrinsic::hexagon_V6_vaddb:
339  case Intrinsic::hexagon_V6_vaddb_128B:
340  case Intrinsic::hexagon_V6_vaddh:
341  case Intrinsic::hexagon_V6_vaddh_128B:
342  case Intrinsic::hexagon_V6_vaddw:
343  case Intrinsic::hexagon_V6_vaddw_128B:
344  case Intrinsic::hexagon_V6_vaddubh:
345  case Intrinsic::hexagon_V6_vaddubh_128B:
346  case Intrinsic::hexagon_V6_vadduhw:
347  case Intrinsic::hexagon_V6_vadduhw_128B:
348  case Intrinsic::hexagon_V6_vaddhw:
349  case Intrinsic::hexagon_V6_vaddhw_128B:
350  case Intrinsic::hexagon_V6_vmaxb:
351  case Intrinsic::hexagon_V6_vmaxb_128B:
352  case Intrinsic::hexagon_V6_vmaxh:
353  case Intrinsic::hexagon_V6_vmaxh_128B:
354  case Intrinsic::hexagon_V6_vmaxw:
355  case Intrinsic::hexagon_V6_vmaxw_128B:
356  case Intrinsic::hexagon_V6_vmaxub:
357  case Intrinsic::hexagon_V6_vmaxub_128B:
358  case Intrinsic::hexagon_V6_vmaxuh:
359  case Intrinsic::hexagon_V6_vmaxuh_128B:
360  case Intrinsic::hexagon_V6_vminub:
361  case Intrinsic::hexagon_V6_vminub_128B:
362  case Intrinsic::hexagon_V6_vminuh:
363  case Intrinsic::hexagon_V6_vminuh_128B:
364  case Intrinsic::hexagon_V6_vminb:
365  case Intrinsic::hexagon_V6_vminb_128B:
366  case Intrinsic::hexagon_V6_vminh:
367  case Intrinsic::hexagon_V6_vminh_128B:
368  case Intrinsic::hexagon_V6_vminw:
369  case Intrinsic::hexagon_V6_vminw_128B:
370  case Intrinsic::hexagon_V6_vmpyub:
371  case Intrinsic::hexagon_V6_vmpyub_128B:
372  case Intrinsic::hexagon_V6_vmpyuh:
373  case Intrinsic::hexagon_V6_vmpyuh_128B:
374  case Intrinsic::hexagon_V6_vavgub:
375  case Intrinsic::hexagon_V6_vavgub_128B:
376  case Intrinsic::hexagon_V6_vavgh:
377  case Intrinsic::hexagon_V6_vavgh_128B:
378  case Intrinsic::hexagon_V6_vavguh:
379  case Intrinsic::hexagon_V6_vavguh_128B:
380  case Intrinsic::hexagon_V6_vavgw:
381  case Intrinsic::hexagon_V6_vavgw_128B:
382  case Intrinsic::hexagon_V6_vavgb:
383  case Intrinsic::hexagon_V6_vavgb_128B:
384  case Intrinsic::hexagon_V6_vavguw:
385  case Intrinsic::hexagon_V6_vavguw_128B:
386  case Intrinsic::hexagon_V6_vabsdiffh:
387  case Intrinsic::hexagon_V6_vabsdiffh_128B:
388  case Intrinsic::hexagon_V6_vabsdiffub:
389  case Intrinsic::hexagon_V6_vabsdiffub_128B:
390  case Intrinsic::hexagon_V6_vabsdiffuh:
391  case Intrinsic::hexagon_V6_vabsdiffuh_128B:
392  case Intrinsic::hexagon_V6_vabsdiffw:
393  case Intrinsic::hexagon_V6_vabsdiffw_128B:
394  return true;
395  default:
396  return false;
397  }
398 }
399 
400 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
401  Instruction *I2) {
402  if (!I1->isSameOperationAs(I2))
403  return false;
404  // This check is in place specifically for intrinsics. isSameOperationAs will
405  // return two for any two hexagon intrinsics because they are essentially the
406  // same instruciton (CallInst). We need to scratch the surface to see if they
407  // are calls to the same function.
408  if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
409  if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
410  if (C1->getCalledFunction() != C2->getCalledFunction())
411  return false;
412  }
413  }
414 
415  // If both the Instructions are of Vector Type and any of the element
416  // is integer constant, check their values too for equivalence.
417  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
418  unsigned NumOperands = I1->getNumOperands();
419  for (unsigned i = 0; i < NumOperands; ++i) {
422  if(!C1) continue;
423  assert(C2);
424  if (C1->getSExtValue() != C2->getSExtValue())
425  return false;
426  }
427  }
428 
429  return true;
430 }
431 
432 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
433  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
434  if (!II)
435  return true;
436 
437  switch (II->getIntrinsicID()) {
438  case Intrinsic::hexagon_V6_hi:
439  case Intrinsic::hexagon_V6_lo:
440  case Intrinsic::hexagon_V6_hi_128B:
441  case Intrinsic::hexagon_V6_lo_128B:
442  LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
443  return false;
444  default:
445  return true;
446  }
447 }
448 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
449  for (auto *D : Dependences) {
450  LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
451  if (D->iterations() > HexagonVLCRIterationLim) {
452  LLVM_DEBUG(
453  dbgs()
454  << ".. Skipping because number of iterations > than the limit\n");
455  continue;
456  }
457 
458  PHINode *PN = cast<PHINode>(D->front());
459  Instruction *BEInst = D->back();
460  int Iters = D->iterations();
461  BasicBlock *BB = PN->getParent();
462  LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
463  << " can be reused\n");
464 
466  for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
467  Use &U = *UI;
468  Instruction *User = cast<Instruction>(U.getUser());
469 
470  if (User->getParent() != BB)
471  continue;
472  if (ReplacedInsts.count(User)) {
473  LLVM_DEBUG(dbgs() << *User
474  << " has already been replaced. Skipping...\n");
475  continue;
476  }
477  if (isa<PHINode>(User))
478  continue;
479  if (User->mayHaveSideEffects())
480  continue;
481  if (!canReplace(User))
482  continue;
483 
484  PNUsers.push_back(User);
485  }
486  LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
487 
488  // For each interesting use I of PN, find an Instruction BEUser that
489  // performs the same operation as I on BEInst and whose other operands,
490  // if any, can also be rematerialized in OtherBB. We stop when we find the
491  // first such Instruction BEUser. This is because once BEUser is
492  // rematerialized in OtherBB, we may find more such "fixup" opportunities
493  // in this block. So, we'll start over again.
494  for (Instruction *I : PNUsers) {
495  for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
496  ++UI) {
497  Use &U = *UI;
498  Instruction *BEUser = cast<Instruction>(U.getUser());
499 
500  if (BEUser->getParent() != BB)
501  continue;
502  if (!isEquivalentOperation(I, BEUser))
503  continue;
504 
505  int NumOperands = I->getNumOperands();
506 
507  // Take operands of each PNUser one by one and try to find DepChain
508  // with every operand of the BEUser. If any of the operands of BEUser
509  // has DepChain with current operand of the PNUser, break the matcher
510  // loop. Keep doing this for Every PNUser operand. If PNUser operand
511  // does not have DepChain with any of the BEUser operand, break the
512  // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
513  // This ensures that DepChain exist for all the PNUser operand with
514  // BEUser operand. This also ensures that DepChains are independent of
515  // the positions in PNUser and BEUser.
516  std::map<Instruction *, DepChain *> DepChains;
517  CallInst *C1 = dyn_cast<CallInst>(I);
518  if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
519  bool Found = false;
520  for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
521  Value *Op = I->getOperand(OpNo);
522  Instruction *OpInst = dyn_cast<Instruction>(Op);
523  Found = false;
524  for (int T = 0; T < NumOperands; ++T) {
525  Value *BEOp = BEUser->getOperand(T);
526  Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
527  if (!OpInst && !BEOpInst) {
528  if (Op == BEOp) {
529  Found = true;
530  break;
531  }
532  }
533 
534  if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
535  continue;
536 
537  DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
538 
539  if (D) {
540  Found = true;
541  DepChains[OpInst] = D;
542  break;
543  }
544  }
545  if (!Found) {
546  BEUser = nullptr;
547  break;
548  }
549  }
550  } else {
551 
552  for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
553  Value *Op = I->getOperand(OpNo);
554  Value *BEOp = BEUser->getOperand(OpNo);
555 
556  Instruction *OpInst = dyn_cast<Instruction>(Op);
557  if (!OpInst) {
558  if (Op == BEOp)
559  continue;
560  // Do not allow reuse to occur when the operands may be different
561  // values.
562  BEUser = nullptr;
563  break;
564  }
565 
566  Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
567  DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
568 
569  if (D) {
570  DepChains[OpInst] = D;
571  } else {
572  BEUser = nullptr;
573  break;
574  }
575  }
576  }
577  if (BEUser) {
578  LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
579  ReuseCandidate.Inst2Replace = I;
580  ReuseCandidate.BackedgeInst = BEUser;
581  ReuseCandidate.DepChains = DepChains;
582  ReuseCandidate.Iterations = Iters;
583  return;
584  }
585  ReuseCandidate.reset();
586  }
587  }
588  }
589  ReuseCandidate.reset();
590 }
591 
592 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
593  BasicBlock *BB) {
594  PHINode *PN = dyn_cast<PHINode>(Op);
595  assert(PN);
596  Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
597  return ValueInBlock;
598 }
599 
600 void HexagonVectorLoopCarriedReuse::reuseValue() {
601  LLVM_DEBUG(dbgs() << ReuseCandidate);
602  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
603  Instruction *BEInst = ReuseCandidate.BackedgeInst;
604  int NumOperands = Inst2Replace->getNumOperands();
605  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
606  int Iterations = ReuseCandidate.Iterations;
607  BasicBlock *LoopPH = CurLoop->getLoopPreheader();
608  assert(!DepChains.empty() && "No DepChains");
609  LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
610 
611  SmallVector<Instruction *, 4> InstsInPreheader;
612  for (int i = 0; i < Iterations; ++i) {
613  Instruction *InstInPreheader = Inst2Replace->clone();
615  for (int j = 0; j < NumOperands; ++j) {
616  Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
617  if (!I)
618  continue;
619  // Get the DepChain corresponding to this operand.
620  DepChain &D = *DepChains[I];
621  // Get the PHI for the iteration number and find
622  // the incoming value from the Loop Preheader for
623  // that PHI.
624  Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
625  InstInPreheader->setOperand(j, ValInPreheader);
626  }
627  InstsInPreheader.push_back(InstInPreheader);
628  InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
629  InstInPreheader->insertBefore(LoopPH->getTerminator());
630  LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
631  << LoopPH->getName() << "\n");
632  }
633  BasicBlock *BB = BEInst->getParent();
634  IRBuilder<> IRB(BB);
635  IRB.SetInsertPoint(BB->getFirstNonPHI());
636  Value *BEVal = BEInst;
637  PHINode *NewPhi;
638  for (int i = Iterations-1; i >=0 ; --i) {
639  Instruction *InstInPreheader = InstsInPreheader[i];
640  NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
641  NewPhi->addIncoming(InstInPreheader, LoopPH);
642  NewPhi->addIncoming(BEVal, BB);
643  LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
644  << "\n");
645  BEVal = NewPhi;
646  }
647  // We are in LCSSA form. So, a value defined inside the Loop is used only
648  // inside the loop. So, the following is safe.
649  Inst2Replace->replaceAllUsesWith(NewPhi);
650  ReplacedInsts.insert(Inst2Replace);
651  ++HexagonNumVectorLoopCarriedReuse;
652 }
653 
654 bool HexagonVectorLoopCarriedReuse::doVLCR() {
655  assert(CurLoop->getSubLoops().empty() &&
656  "Can do VLCR on the innermost loop only");
657  assert((CurLoop->getNumBlocks() == 1) &&
658  "Can do VLCR only on single block loops");
659 
660  bool Changed = false;
661  bool Continue;
662 
663  LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
664  do {
665  // Reset datastructures.
666  Dependences.clear();
667  Continue = false;
668 
669  findLoopCarriedDeps();
670  findValueToReuse();
671  if (ReuseCandidate.isDefined()) {
672  reuseValue();
673  Changed = true;
674  Continue = true;
675  }
676  llvm::for_each(Dependences, std::default_delete<DepChain>());
677  } while (Continue);
678  return Changed;
679 }
680 
681 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
682  DepChain &D) {
683  PHINode *PN = dyn_cast<PHINode>(I);
684  if (!PN) {
685  D.push_back(I);
686  return;
687  } else {
688  auto NumIncomingValues = PN->getNumIncomingValues();
689  if (NumIncomingValues != 2) {
690  D.clear();
691  return;
692  }
693 
694  BasicBlock *BB = PN->getParent();
695  if (BB != CurLoop->getHeader()) {
696  D.clear();
697  return;
698  }
699 
700  Value *BEVal = PN->getIncomingValueForBlock(BB);
701  Instruction *BEInst = dyn_cast<Instruction>(BEVal);
702  // This is a single block loop with a preheader, so at least
703  // one value should come over the backedge.
704  assert(BEInst && "There should be a value over the backedge");
705 
706  Value *PreHdrVal =
707  PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
708  if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
709  D.clear();
710  return;
711  }
712  D.push_back(PN);
713  findDepChainFromPHI(BEInst, D);
714  }
715 }
716 
717 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
718  Instruction *I2,
719  int Iters) {
720  for (auto *D : Dependences) {
721  if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
722  return D;
723  }
724  return nullptr;
725 }
726 
727 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
728  BasicBlock *BB = CurLoop->getHeader();
729  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
730  auto *PN = cast<PHINode>(I);
731  if (!isa<VectorType>(PN->getType()))
732  continue;
733 
734  DepChain *D = new DepChain();
735  findDepChainFromPHI(PN, *D);
736  if (D->size() != 0)
737  Dependences.insert(D);
738  else
739  delete D;
740  }
741  LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
742  LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
743  ++i) { dbgs() << *Dependences[i] << "\n"; });
744 }
745 
747  return new HexagonVectorLoopCarriedReuse();
748 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
use_iterator use_end()
Definition: Value.h:366
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This class represents a function call, abstracting a target machine&#39;s calling convention.
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2), cl::ZeroOrMore)
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
This defines the Use class.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
hexagon Hexagon specific predictive commoning for HVX vectors
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", "Hexagon-specific predictive commoning for HVX vectors", false, false) INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:40
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
AnalysisUsage & addPreservedID(const void *ID)
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
void initializeHexagonVectorLoopCarriedReusePass(PassRegistry &)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
char & LCSSAID
Definition: LCSSA.cpp:467
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
Represent the analysis usage information of a pass.
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2218
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
iterator end()
Definition: BasicBlock.h:270
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:488
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:225
use_iterator use_begin()
Definition: Value.h:358
Pass * createHexagonVectorLoopCarriedReusePass()
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2045
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
A vector that has set insertion semantics.
Definition: SetVector.h:40
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1158
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66