LLVM  12.0.0git
HexagonVectorLoopCarriedReuse.cpp
Go to the documentation of this file.
1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
12 // types.
13 //
14 //-----------------------------------------------------------------------------
15 // Motivation: Consider the case where we have the following loop structure.
16 //
17 // Loop:
18 // t0 = a[i];
19 // t1 = f(t0);
20 // t2 = g(t1);
21 // ...
22 // t3 = a[i+1];
23 // t4 = f(t3);
24 // t5 = g(t4);
25 // t6 = op(t2, t5)
26 // cond_branch <Loop>
27 //
28 // This can be converted to
29 // t00 = a[0];
30 // t10 = f(t00);
31 // t20 = g(t10);
32 // Loop:
33 // t2 = t20;
34 // t3 = a[i+1];
35 // t4 = f(t3);
36 // t5 = g(t4);
37 // t6 = op(t2, t5)
38 // t20 = t5
39 // cond_branch <Loop>
40 //
41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42 // Such a loop comes to this pass in the following form.
43 //
44 // LoopPreheader:
45 // X0 = a[0];
46 // Loop:
47 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48 // t1 = f(X2) <-- I1
49 // t2 = g(t1)
50 // ...
51 // X1 = a[i+1]
52 // t4 = f(X1) <-- I2
53 // t5 = g(t4)
54 // t6 = op(t2, t5)
55 // cond_branch <Loop>
56 //
57 // In this pass, we look for PHIs such as X2 whose incoming values come only
58 // from the Loop Preheader and over the backedge and additionaly, both these
59 // values are the results of the same operation in terms of opcode. We call such
60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61 // over X1 is carried over only one iteration and so the DepChain is only one
62 // PHI node long.
63 //
64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65 // PHI coming over the backedge (X1). We stop at the first pair of such users
66 // I1 (of X2) and I2 (of X1) that meet the following conditions.
67 // 1. I1 and I2 are the same operation, but with different operands.
68 // 2. X2 and X1 are used at the same operand number in the two instructions.
69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71 //
72 // We then make the following transformation
73 // LoopPreheader:
74 // X0 = a[0];
75 // Y0 = f(X0);
76 // Loop:
77 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79 // t1 = f(X2) <-- Will be removed by DCE.
80 // t2 = g(Y2)
81 // ...
82 // X1 = a[i+1]
83 // t4 = f(X1)
84 // t5 = g(t4)
85 // t6 = op(t2, t5)
86 // cond_branch <Loop>
87 //
88 // We proceed until we cannot find any more such instructions I1 and I2.
89 //
90 // --- DepChains & Loop carried dependences ---
91 // Consider a single basic block loop such as
92 //
93 // LoopPreheader:
94 // X0 = ...
95 // Y0 = ...
96 // Loop:
97 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99 // ...
100 // X1 = ...
101 // ...
102 // cond_branch <Loop>
103 //
104 // Then there is a dependence between X2 and X1 that goes back one iteration,
105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
106 // DepChain from X2 to X1 (X2->X1).
107 // Similarly, there is a dependence between Y2 and X1 that goes back two
108 // iterations. X1 is used as Y2 two iterations after it is computed. This is
109 // represented by a DepChain as (Y2->X2->X1).
110 //
111 // A DepChain has the following properties.
112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113 // iterations of carried dependence + 1.
114 // 2. All instructions in the DepChain except the last are PHIs.
115 //
116 //===----------------------------------------------------------------------===//
117 
118 #include "llvm/ADT/SetVector.h"
119 #include "llvm/ADT/SmallVector.h"
120 #include "llvm/ADT/Statistic.h"
121 #include "llvm/Analysis/LoopInfo.h"
122 #include "llvm/Analysis/LoopPass.h"
123 #include "llvm/IR/BasicBlock.h"
124 #include "llvm/IR/DerivedTypes.h"
125 #include "llvm/IR/IRBuilder.h"
126 #include "llvm/IR/Instruction.h"
127 #include "llvm/IR/Instructions.h"
128 #include "llvm/IR/IntrinsicInst.h"
129 #include "llvm/IR/Intrinsics.h"
130 #include "llvm/IR/IntrinsicsHexagon.h"
131 #include "llvm/IR/Use.h"
132 #include "llvm/IR/User.h"
133 #include "llvm/IR/Value.h"
134 #include "llvm/InitializePasses.h"
135 #include "llvm/Pass.h"
136 #include "llvm/Support/Casting.h"
138 #include "llvm/Support/Compiler.h"
139 #include "llvm/Support/Debug.h"
141 #include "llvm/Transforms/Scalar.h"
142 #include "llvm/Transforms/Utils.h"
143 #include <algorithm>
144 #include <cassert>
145 #include <cstddef>
146 #include <map>
147 #include <memory>
148 #include <set>
149 
150 using namespace llvm;
151 
152 #define DEBUG_TYPE "hexagon-vlcr"
153 
154 STATISTIC(HexagonNumVectorLoopCarriedReuse,
155  "Number of values that were reused from a previous iteration.");
156 
157 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
158  cl::Hidden,
159  cl::desc("Maximum distance of loop carried dependences that are handled"),
161 
162 namespace llvm {
163 
166 
167 } // end namespace llvm
168 
169 namespace {
170 
171  // See info about DepChain in the comments at the top of this file.
172  using ChainOfDependences = SmallVector<Instruction *, 4>;
173 
174  class DepChain {
175  ChainOfDependences Chain;
176 
177  public:
178  bool isIdentical(DepChain &Other) const {
179  if (Other.size() != size())
180  return false;
181  ChainOfDependences &OtherChain = Other.getChain();
182  for (int i = 0; i < size(); ++i) {
183  if (Chain[i] != OtherChain[i])
184  return false;
185  }
186  return true;
187  }
188 
189  ChainOfDependences &getChain() {
190  return Chain;
191  }
192 
193  int size() const {
194  return Chain.size();
195  }
196 
197  void clear() {
198  Chain.clear();
199  }
200 
201  void push_back(Instruction *I) {
202  Chain.push_back(I);
203  }
204 
205  int iterations() const {
206  return size() - 1;
207  }
208 
209  Instruction *front() const {
210  return Chain.front();
211  }
212 
213  Instruction *back() const {
214  return Chain.back();
215  }
216 
217  Instruction *&operator[](const int index) {
218  return Chain[index];
219  }
220 
221  friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
222  };
223 
225  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
226  const ChainOfDependences &CD = D.Chain;
227  int ChainSize = CD.size();
228  OS << "**DepChain Start::**\n";
229  for (int i = 0; i < ChainSize -1; ++i) {
230  OS << *(CD[i]) << " -->\n";
231  }
232  OS << *CD[ChainSize-1] << "\n";
233  return OS;
234  }
235 
236  struct ReuseValue {
237  Instruction *Inst2Replace = nullptr;
238 
239  // In the new PHI node that we'll construct this is the value that'll be
240  // used over the backedge. This is the value that gets reused from a
241  // previous iteration.
242  Instruction *BackedgeInst = nullptr;
243  std::map<Instruction *, DepChain *> DepChains;
244  int Iterations = -1;
245 
246  ReuseValue() = default;
247 
248  void reset() {
249  Inst2Replace = nullptr;
250  BackedgeInst = nullptr;
251  DepChains.clear();
252  Iterations = -1;
253  }
254  bool isDefined() { return Inst2Replace != nullptr; }
255  };
256 
258  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
259  OS << "** ReuseValue ***\n";
260  OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
261  OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
262  return OS;
263  }
264 
265  class HexagonVectorLoopCarriedReuse : public LoopPass {
266  public:
267  static char ID;
268 
269  explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
272  }
273 
274  StringRef getPassName() const override {
275  return "Hexagon-specific loop carried reuse for HVX vectors";
276  }
277 
278  void getAnalysisUsage(AnalysisUsage &AU) const override {
283  AU.setPreservesCFG();
284  }
285 
286  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
287 
288  private:
289  SetVector<DepChain *> Dependences;
290  std::set<Instruction *> ReplacedInsts;
291  Loop *CurLoop;
292  ReuseValue ReuseCandidate;
293 
294  bool doVLCR();
295  void findLoopCarriedDeps();
296  void findValueToReuse();
297  void findDepChainFromPHI(Instruction *I, DepChain &D);
298  void reuseValue();
299  Value *findValueInBlock(Value *Op, BasicBlock *BB);
300  DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
301  bool isEquivalentOperation(Instruction *I1, Instruction *I2);
302  bool canReplace(Instruction *I);
303  bool isCallInstCommutative(CallInst *C);
304  };
305 
306 } // end anonymous namespace
307 
309 
310 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
311  "Hexagon-specific predictive commoning for HVX vectors", false, false)
313 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
314 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
315 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
316  "Hexagon-specific predictive commoning for HVX vectors", false, false)
317 
318 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
319  if (skipLoop(L))
320  return false;
321 
322  if (!L->getLoopPreheader())
323  return false;
324 
325  // Work only on innermost loops.
326  if (!L->getSubLoops().empty())
327  return false;
328 
329  // Work only on single basic blocks loops.
330  if (L->getNumBlocks() != 1)
331  return false;
332 
333  CurLoop = L;
334 
335  return doVLCR();
336 }
337 
338 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
339  switch (C->getCalledFunction()->getIntrinsicID()) {
340  case Intrinsic::hexagon_V6_vaddb:
341  case Intrinsic::hexagon_V6_vaddb_128B:
342  case Intrinsic::hexagon_V6_vaddh:
343  case Intrinsic::hexagon_V6_vaddh_128B:
344  case Intrinsic::hexagon_V6_vaddw:
345  case Intrinsic::hexagon_V6_vaddw_128B:
346  case Intrinsic::hexagon_V6_vaddubh:
347  case Intrinsic::hexagon_V6_vaddubh_128B:
348  case Intrinsic::hexagon_V6_vadduhw:
349  case Intrinsic::hexagon_V6_vadduhw_128B:
350  case Intrinsic::hexagon_V6_vaddhw:
351  case Intrinsic::hexagon_V6_vaddhw_128B:
352  case Intrinsic::hexagon_V6_vmaxb:
353  case Intrinsic::hexagon_V6_vmaxb_128B:
354  case Intrinsic::hexagon_V6_vmaxh:
355  case Intrinsic::hexagon_V6_vmaxh_128B:
356  case Intrinsic::hexagon_V6_vmaxw:
357  case Intrinsic::hexagon_V6_vmaxw_128B:
358  case Intrinsic::hexagon_V6_vmaxub:
359  case Intrinsic::hexagon_V6_vmaxub_128B:
360  case Intrinsic::hexagon_V6_vmaxuh:
361  case Intrinsic::hexagon_V6_vmaxuh_128B:
362  case Intrinsic::hexagon_V6_vminub:
363  case Intrinsic::hexagon_V6_vminub_128B:
364  case Intrinsic::hexagon_V6_vminuh:
365  case Intrinsic::hexagon_V6_vminuh_128B:
366  case Intrinsic::hexagon_V6_vminb:
367  case Intrinsic::hexagon_V6_vminb_128B:
368  case Intrinsic::hexagon_V6_vminh:
369  case Intrinsic::hexagon_V6_vminh_128B:
370  case Intrinsic::hexagon_V6_vminw:
371  case Intrinsic::hexagon_V6_vminw_128B:
372  case Intrinsic::hexagon_V6_vmpyub:
373  case Intrinsic::hexagon_V6_vmpyub_128B:
374  case Intrinsic::hexagon_V6_vmpyuh:
375  case Intrinsic::hexagon_V6_vmpyuh_128B:
376  case Intrinsic::hexagon_V6_vavgub:
377  case Intrinsic::hexagon_V6_vavgub_128B:
378  case Intrinsic::hexagon_V6_vavgh:
379  case Intrinsic::hexagon_V6_vavgh_128B:
380  case Intrinsic::hexagon_V6_vavguh:
381  case Intrinsic::hexagon_V6_vavguh_128B:
382  case Intrinsic::hexagon_V6_vavgw:
383  case Intrinsic::hexagon_V6_vavgw_128B:
384  case Intrinsic::hexagon_V6_vavgb:
385  case Intrinsic::hexagon_V6_vavgb_128B:
386  case Intrinsic::hexagon_V6_vavguw:
387  case Intrinsic::hexagon_V6_vavguw_128B:
388  case Intrinsic::hexagon_V6_vabsdiffh:
389  case Intrinsic::hexagon_V6_vabsdiffh_128B:
390  case Intrinsic::hexagon_V6_vabsdiffub:
391  case Intrinsic::hexagon_V6_vabsdiffub_128B:
392  case Intrinsic::hexagon_V6_vabsdiffuh:
393  case Intrinsic::hexagon_V6_vabsdiffuh_128B:
394  case Intrinsic::hexagon_V6_vabsdiffw:
395  case Intrinsic::hexagon_V6_vabsdiffw_128B:
396  return true;
397  default:
398  return false;
399  }
400 }
401 
402 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
403  Instruction *I2) {
404  if (!I1->isSameOperationAs(I2))
405  return false;
406  // This check is in place specifically for intrinsics. isSameOperationAs will
407  // return two for any two hexagon intrinsics because they are essentially the
408  // same instruciton (CallInst). We need to scratch the surface to see if they
409  // are calls to the same function.
410  if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
411  if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
412  if (C1->getCalledFunction() != C2->getCalledFunction())
413  return false;
414  }
415  }
416 
417  // If both the Instructions are of Vector Type and any of the element
418  // is integer constant, check their values too for equivalence.
419  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
420  unsigned NumOperands = I1->getNumOperands();
421  for (unsigned i = 0; i < NumOperands; ++i) {
424  if(!C1) continue;
425  assert(C2);
426  if (C1->getSExtValue() != C2->getSExtValue())
427  return false;
428  }
429  }
430 
431  return true;
432 }
433 
434 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
435  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
436  if (!II)
437  return true;
438 
439  switch (II->getIntrinsicID()) {
440  case Intrinsic::hexagon_V6_hi:
441  case Intrinsic::hexagon_V6_lo:
442  case Intrinsic::hexagon_V6_hi_128B:
443  case Intrinsic::hexagon_V6_lo_128B:
444  LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
445  return false;
446  default:
447  return true;
448  }
449 }
450 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
451  for (auto *D : Dependences) {
452  LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
453  if (D->iterations() > HexagonVLCRIterationLim) {
454  LLVM_DEBUG(
455  dbgs()
456  << ".. Skipping because number of iterations > than the limit\n");
457  continue;
458  }
459 
460  PHINode *PN = cast<PHINode>(D->front());
461  Instruction *BEInst = D->back();
462  int Iters = D->iterations();
463  BasicBlock *BB = PN->getParent();
464  LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
465  << " can be reused\n");
466 
468  for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
469  Use &U = *UI;
470  Instruction *User = cast<Instruction>(U.getUser());
471 
472  if (User->getParent() != BB)
473  continue;
474  if (ReplacedInsts.count(User)) {
475  LLVM_DEBUG(dbgs() << *User
476  << " has already been replaced. Skipping...\n");
477  continue;
478  }
479  if (isa<PHINode>(User))
480  continue;
481  if (User->mayHaveSideEffects())
482  continue;
483  if (!canReplace(User))
484  continue;
485 
486  PNUsers.push_back(User);
487  }
488  LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
489 
490  // For each interesting use I of PN, find an Instruction BEUser that
491  // performs the same operation as I on BEInst and whose other operands,
492  // if any, can also be rematerialized in OtherBB. We stop when we find the
493  // first such Instruction BEUser. This is because once BEUser is
494  // rematerialized in OtherBB, we may find more such "fixup" opportunities
495  // in this block. So, we'll start over again.
496  for (Instruction *I : PNUsers) {
497  for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
498  ++UI) {
499  Use &U = *UI;
500  Instruction *BEUser = cast<Instruction>(U.getUser());
501 
502  if (BEUser->getParent() != BB)
503  continue;
504  if (!isEquivalentOperation(I, BEUser))
505  continue;
506 
507  int NumOperands = I->getNumOperands();
508 
509  // Take operands of each PNUser one by one and try to find DepChain
510  // with every operand of the BEUser. If any of the operands of BEUser
511  // has DepChain with current operand of the PNUser, break the matcher
512  // loop. Keep doing this for Every PNUser operand. If PNUser operand
513  // does not have DepChain with any of the BEUser operand, break the
514  // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
515  // This ensures that DepChain exist for all the PNUser operand with
516  // BEUser operand. This also ensures that DepChains are independent of
517  // the positions in PNUser and BEUser.
518  std::map<Instruction *, DepChain *> DepChains;
519  CallInst *C1 = dyn_cast<CallInst>(I);
520  if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
521  bool Found = false;
522  for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
523  Value *Op = I->getOperand(OpNo);
524  Instruction *OpInst = dyn_cast<Instruction>(Op);
525  Found = false;
526  for (int T = 0; T < NumOperands; ++T) {
527  Value *BEOp = BEUser->getOperand(T);
528  Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
529  if (!OpInst && !BEOpInst) {
530  if (Op == BEOp) {
531  Found = true;
532  break;
533  }
534  }
535 
536  if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
537  continue;
538 
539  DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
540 
541  if (D) {
542  Found = true;
543  DepChains[OpInst] = D;
544  break;
545  }
546  }
547  if (!Found) {
548  BEUser = nullptr;
549  break;
550  }
551  }
552  } else {
553 
554  for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
555  Value *Op = I->getOperand(OpNo);
556  Value *BEOp = BEUser->getOperand(OpNo);
557 
558  Instruction *OpInst = dyn_cast<Instruction>(Op);
559  if (!OpInst) {
560  if (Op == BEOp)
561  continue;
562  // Do not allow reuse to occur when the operands may be different
563  // values.
564  BEUser = nullptr;
565  break;
566  }
567 
568  Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
569  DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
570 
571  if (D) {
572  DepChains[OpInst] = D;
573  } else {
574  BEUser = nullptr;
575  break;
576  }
577  }
578  }
579  if (BEUser) {
580  LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
581  ReuseCandidate.Inst2Replace = I;
582  ReuseCandidate.BackedgeInst = BEUser;
583  ReuseCandidate.DepChains = DepChains;
584  ReuseCandidate.Iterations = Iters;
585  return;
586  }
587  ReuseCandidate.reset();
588  }
589  }
590  }
591  ReuseCandidate.reset();
592 }
593 
594 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
595  BasicBlock *BB) {
596  PHINode *PN = dyn_cast<PHINode>(Op);
597  assert(PN);
598  Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
599  return ValueInBlock;
600 }
601 
602 void HexagonVectorLoopCarriedReuse::reuseValue() {
603  LLVM_DEBUG(dbgs() << ReuseCandidate);
604  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
605  Instruction *BEInst = ReuseCandidate.BackedgeInst;
606  int NumOperands = Inst2Replace->getNumOperands();
607  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
608  int Iterations = ReuseCandidate.Iterations;
609  BasicBlock *LoopPH = CurLoop->getLoopPreheader();
610  assert(!DepChains.empty() && "No DepChains");
611  LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
612 
613  SmallVector<Instruction *, 4> InstsInPreheader;
614  for (int i = 0; i < Iterations; ++i) {
615  Instruction *InstInPreheader = Inst2Replace->clone();
617  for (int j = 0; j < NumOperands; ++j) {
618  Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
619  if (!I)
620  continue;
621  // Get the DepChain corresponding to this operand.
622  DepChain &D = *DepChains[I];
623  // Get the PHI for the iteration number and find
624  // the incoming value from the Loop Preheader for
625  // that PHI.
626  Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
627  InstInPreheader->setOperand(j, ValInPreheader);
628  }
629  InstsInPreheader.push_back(InstInPreheader);
630  InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
631  InstInPreheader->insertBefore(LoopPH->getTerminator());
632  LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
633  << LoopPH->getName() << "\n");
634  }
635  BasicBlock *BB = BEInst->getParent();
636  IRBuilder<> IRB(BB);
637  IRB.SetInsertPoint(BB->getFirstNonPHI());
638  Value *BEVal = BEInst;
639  PHINode *NewPhi;
640  for (int i = Iterations-1; i >=0 ; --i) {
641  Instruction *InstInPreheader = InstsInPreheader[i];
642  NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
643  NewPhi->addIncoming(InstInPreheader, LoopPH);
644  NewPhi->addIncoming(BEVal, BB);
645  LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
646  << "\n");
647  BEVal = NewPhi;
648  }
649  // We are in LCSSA form. So, a value defined inside the Loop is used only
650  // inside the loop. So, the following is safe.
651  Inst2Replace->replaceAllUsesWith(NewPhi);
652  ReplacedInsts.insert(Inst2Replace);
653  ++HexagonNumVectorLoopCarriedReuse;
654 }
655 
656 bool HexagonVectorLoopCarriedReuse::doVLCR() {
657  assert(CurLoop->getSubLoops().empty() &&
658  "Can do VLCR on the innermost loop only");
659  assert((CurLoop->getNumBlocks() == 1) &&
660  "Can do VLCR only on single block loops");
661 
662  bool Changed = false;
663  bool Continue;
664 
665  LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
666  do {
667  // Reset datastructures.
668  Dependences.clear();
669  Continue = false;
670 
671  findLoopCarriedDeps();
672  findValueToReuse();
673  if (ReuseCandidate.isDefined()) {
674  reuseValue();
675  Changed = true;
676  Continue = true;
677  }
678  llvm::for_each(Dependences, std::default_delete<DepChain>());
679  } while (Continue);
680  return Changed;
681 }
682 
683 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
684  DepChain &D) {
685  PHINode *PN = dyn_cast<PHINode>(I);
686  if (!PN) {
687  D.push_back(I);
688  return;
689  } else {
690  auto NumIncomingValues = PN->getNumIncomingValues();
691  if (NumIncomingValues != 2) {
692  D.clear();
693  return;
694  }
695 
696  BasicBlock *BB = PN->getParent();
697  if (BB != CurLoop->getHeader()) {
698  D.clear();
699  return;
700  }
701 
702  Value *BEVal = PN->getIncomingValueForBlock(BB);
703  Instruction *BEInst = dyn_cast<Instruction>(BEVal);
704  // This is a single block loop with a preheader, so at least
705  // one value should come over the backedge.
706  assert(BEInst && "There should be a value over the backedge");
707 
708  Value *PreHdrVal =
709  PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
710  if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
711  D.clear();
712  return;
713  }
714  D.push_back(PN);
715  findDepChainFromPHI(BEInst, D);
716  }
717 }
718 
719 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
720  Instruction *I2,
721  int Iters) {
722  for (auto *D : Dependences) {
723  if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
724  return D;
725  }
726  return nullptr;
727 }
728 
729 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
730  BasicBlock *BB = CurLoop->getHeader();
731  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
732  auto *PN = cast<PHINode>(I);
733  if (!isa<VectorType>(PN->getType()))
734  continue;
735 
736  DepChain *D = new DepChain();
737  findDepChainFromPHI(PN, *D);
738  if (D->size() != 0)
739  Dependences.insert(D);
740  else
741  delete D;
742  }
743  LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
744  LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
745  ++i) { dbgs() << *Dependences[i] << "\n"; });
746 }
747 
749  return new HexagonVectorLoopCarriedReuse();
750 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:77
uint64_t CallInst * C
use_iterator use_end()
Definition: Value.h:365
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This class represents a function call, abstracting a target machine&#39;s calling convention.
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2), cl::ZeroOrMore)
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:150
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:231
This defines the Use class.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:289
AnalysisUsage & addRequired()
hexagon Hexagon specific predictive commoning for HVX vectors
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", "Hexagon-specific predictive commoning for HVX vectors", false, false) INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:342
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
AnalysisUsage & addPreservedID(const void *ID)
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:486
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:161
Value * getOperand(unsigned i) const
Definition: User.h:169
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2320
void initializeHexagonVectorLoopCarriedReusePass(PassRegistry &)
for(auto &MBB :MF)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:86
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
char & LCSSAID
Definition: LCSSA.cpp:468
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:619
Represent the analysis usage information of a pass.
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:187
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
iterator end()
Definition: BasicBlock.h:291
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:266
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:535
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:195
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
auto size(R &&Range, std::enable_if_t< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1473
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:225
use_iterator use_begin()
Definition: Value.h:357
Pass * createHexagonVectorLoopCarriedReusePass()
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:516
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1314
#define I(x, y, z)
Definition: MD5.cpp:59
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2099
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
A vector that has set insertion semantics.
Definition: SetVector.h:40
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1233
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:150
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1484
#define LLVM_DEBUG(X)
Definition: Debug.h:122
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:94
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)