LLVM  15.0.0git
PPCLoopInstrFormPrep.cpp
Go to the documentation of this file.
1 //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to prepare loops for ppc preferred addressing
10 // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
11 // update)
12 // Additional PHIs are created for loop induction variables used by load/store
13 // instructions so that preferred addressing modes can be used.
14 //
15 // 1: DS/DQ form preparation, prepare the load/store instructions so that they
16 // can satisfy the DS/DQ form displacement requirements.
17 // Generically, this means transforming loops like this:
18 // for (int i = 0; i < n; ++i) {
19 // unsigned long x1 = *(unsigned long *)(p + i + 5);
20 // unsigned long x2 = *(unsigned long *)(p + i + 9);
21 // }
22 //
23 // to look like this:
24 //
25 // unsigned NewP = p + 5;
26 // for (int i = 0; i < n; ++i) {
27 // unsigned long x1 = *(unsigned long *)(i + NewP);
28 // unsigned long x2 = *(unsigned long *)(i + NewP + 4);
29 // }
30 //
31 // 2: D/DS form with update preparation, prepare the load/store instructions so
32 // that we can use update form to do pre-increment.
33 // Generically, this means transforming loops like this:
34 // for (int i = 0; i < n; ++i)
35 // array[i] = c;
36 //
37 // to look like this:
38 //
39 // T *p = array[-1];
40 // for (int i = 0; i < n; ++i)
41 // *++p = c;
42 //
43 // 3: common multiple chains for the load/stores with same offsets in the loop,
44 // so that we can reuse the offsets and reduce the register pressure in the
45 // loop. This transformation can also increase the loop ILP as now each chain
46 // uses its own loop induction add/addi. But this will increase the number of
47 // add/addi in the loop.
48 //
49 // Generically, this means transforming loops like this:
50 //
51 // char *p;
52 // A1 = p + base1
53 // A2 = p + base1 + offset
54 // B1 = p + base2
55 // B2 = p + base2 + offset
56 //
57 // for (int i = 0; i < n; i++)
58 // unsigned long x1 = *(unsigned long *)(A1 + i);
59 // unsigned long x2 = *(unsigned long *)(A2 + i)
60 // unsigned long x3 = *(unsigned long *)(B1 + i);
61 // unsigned long x4 = *(unsigned long *)(B2 + i);
62 // }
63 //
64 // to look like this:
65 //
66 // A1_new = p + base1 // chain 1
67 // B1_new = p + base2 // chain 2, now inside the loop, common offset is
68 // // reused.
69 //
70 // for (long long i = 0; i < n; i+=count) {
71 // unsigned long x1 = *(unsigned long *)(A1_new + i);
72 // unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);
73 // unsigned long x3 = *(unsigned long *)(B1_new + i);
74 // unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);
75 // }
76 //===----------------------------------------------------------------------===//
77 
78 #include "PPC.h"
79 #include "PPCSubtarget.h"
80 #include "PPCTargetMachine.h"
82 #include "llvm/ADT/SmallPtrSet.h"
83 #include "llvm/ADT/SmallSet.h"
84 #include "llvm/ADT/SmallVector.h"
85 #include "llvm/ADT/Statistic.h"
86 #include "llvm/Analysis/LoopInfo.h"
89 #include "llvm/IR/BasicBlock.h"
90 #include "llvm/IR/CFG.h"
91 #include "llvm/IR/Dominators.h"
92 #include "llvm/IR/Instruction.h"
93 #include "llvm/IR/Instructions.h"
94 #include "llvm/IR/IntrinsicInst.h"
95 #include "llvm/IR/IntrinsicsPowerPC.h"
96 #include "llvm/IR/Module.h"
97 #include "llvm/IR/Type.h"
98 #include "llvm/IR/Value.h"
99 #include "llvm/InitializePasses.h"
100 #include "llvm/Pass.h"
101 #include "llvm/Support/Casting.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Transforms/Scalar.h"
105 #include "llvm/Transforms/Utils.h"
110 #include <cassert>
111 #include <iterator>
112 #include <utility>
113 
114 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
115 
116 using namespace llvm;
117 
118 static cl::opt<unsigned>
119  MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
120  cl::desc("Potential common base number threshold per function "
121  "for PPC loop prep"));
122 
123 static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
124  cl::init(true), cl::Hidden,
125  cl::desc("prefer update form when ds form is also a update form"));
126 
128  "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
129  cl::desc("prepare update form when the load/store increment is a loop "
130  "invariant non-const value."));
131 
133  "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
134  cl::desc("Enable chain commoning in PPC loop prepare pass."));
135 
136 // Sum of following 3 per loop thresholds for all loops can not be larger
137 // than MaxVarsPrep.
138 // now the thresholds for each kind prep are exterimental values on Power9.
139 static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
140  cl::Hidden, cl::init(3),
141  cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
142  "form"));
143 
144 static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
145  cl::Hidden, cl::init(3),
146  cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
147 
148 static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
149  cl::Hidden, cl::init(8),
150  cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
151 
152 // Commoning chain will reduce the register pressure, so we don't consider about
153 // the PHI nodes number.
154 // But commoning chain will increase the addi/add number in the loop and also
155 // increase loop ILP. Maximum chain number should be same with hardware
156 // IssueWidth, because we won't benefit from ILP if the parallel chains number
157 // is bigger than IssueWidth. We assume there are 2 chains in one bucket, so
158 // there would be 4 buckets at most on P9(IssueWidth is 8).
160  "ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),
161  cl::desc("Bucket number per loop for PPC loop chain common"));
162 
163 // If would not be profitable if the common base has only one load/store, ISEL
164 // should already be able to choose best load/store form based on offset for
165 // single load/store. Set minimal profitable value default to 2 and make it as
166 // an option.
167 static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
168  cl::Hidden, cl::init(2),
169  cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
170  "preparation"));
171 
173  "ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),
174  cl::desc("Minimal common base load/store instructions triggering chain "
175  "commoning preparation. Must be not smaller than 4"));
176 
177 STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
178 STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
179 STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
180 STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
181 STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
182 STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
183 STATISTIC(ChainCommoningRewritten, "Num of commoning chains");
184 
185 namespace {
186  struct BucketElement {
187  BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}
188  BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
189 
190  const SCEV *Offset;
191  Instruction *Instr;
192  };
193 
194  struct Bucket {
195  Bucket(const SCEV *B, Instruction *I)
196  : BaseSCEV(B), Elements(1, BucketElement(I)) {
197  ChainSize = 0;
198  }
199 
200  // The base of the whole bucket.
201  const SCEV *BaseSCEV;
202 
203  // All elements in the bucket. In the bucket, the element with the BaseSCEV
204  // has no offset and all other elements are stored as offsets to the
205  // BaseSCEV.
207 
208  // The potential chains size. This is used for chain commoning only.
209  unsigned ChainSize;
210 
211  // The base for each potential chain. This is used for chain commoning only.
213  };
214 
215  // "UpdateForm" is not a real PPC instruction form, it stands for dform
216  // load/store with update like ldu/stdu, or Prefetch intrinsic.
217  // For DS form instructions, their displacements must be multiple of 4.
218  // For DQ form instructions, their displacements must be multiple of 16.
219  enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
220 
221  class PPCLoopInstrFormPrep : public FunctionPass {
222  public:
223  static char ID; // Pass ID, replacement for typeid
224 
225  PPCLoopInstrFormPrep() : FunctionPass(ID) {
227  }
228 
229  PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
231  }
232 
233  void getAnalysisUsage(AnalysisUsage &AU) const override {
238  }
239 
240  bool runOnFunction(Function &F) override;
241 
242  private:
243  PPCTargetMachine *TM = nullptr;
244  const PPCSubtarget *ST;
245  DominatorTree *DT;
246  LoopInfo *LI;
247  ScalarEvolution *SE;
248  bool PreserveLCSSA;
249  bool HasCandidateForPrepare;
250 
251  /// Successful preparation number for Update/DS/DQ form in all inner most
252  /// loops. One successful preparation will put one common base out of loop,
253  /// this may leads to register presure like LICM does.
254  /// Make sure total preparation number can be controlled by option.
255  unsigned SuccPrepCount;
256 
257  bool runOnLoop(Loop *L);
258 
259  /// Check if required PHI node is already exist in Loop \p L.
260  bool alreadyPrepared(Loop *L, Instruction *MemI,
261  const SCEV *BasePtrStartSCEV,
262  const SCEV *BasePtrIncSCEV, PrepForm Form);
263 
264  /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
265  Value *getNodeForInc(Loop *L, Instruction *MemI,
266  const SCEV *BasePtrIncSCEV);
267 
268  /// Common chains to reuse offsets for a loop to reduce register pressure.
269  bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);
270 
271  /// Find out the potential commoning chains and their bases.
272  bool prepareBasesForCommoningChains(Bucket &BucketChain);
273 
274  /// Rewrite load/store according to the common chains.
275  bool
276  rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,
277  SmallSet<BasicBlock *, 16> &BBChanged);
278 
279  /// Collect condition matched(\p isValidCandidate() returns true)
280  /// candidates in Loop \p L.
281  SmallVector<Bucket, 16> collectCandidates(
282  Loop *L,
283  std::function<bool(const Instruction *, Value *, const Type *)>
284  isValidCandidate,
285  std::function<bool(const SCEV *)> isValidDiff,
286  unsigned MaxCandidateNum);
287 
288  /// Add a candidate to candidates \p Buckets if diff between candidate and
289  /// one base in \p Buckets matches \p isValidDiff.
290  void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
291  SmallVector<Bucket, 16> &Buckets,
292  std::function<bool(const SCEV *)> isValidDiff,
293  unsigned MaxCandidateNum);
294 
295  /// Prepare all candidates in \p Buckets for update form.
296  bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
297 
298  /// Prepare all candidates in \p Buckets for displacement form, now for
299  /// ds/dq.
300  bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
301 
302  /// Prepare for one chain \p BucketChain, find the best base element and
303  /// update all other elements in \p BucketChain accordingly.
304  /// \p Form is used to find the best base element.
305  /// If success, best base element must be stored as the first element of
306  /// \p BucketChain.
307  /// Return false if no base element found, otherwise return true.
308  bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
309 
310  /// Prepare for one chain \p BucketChain, find the best base element and
311  /// update all other elements in \p BucketChain accordingly.
312  /// If success, best base element must be stored as the first element of
313  /// \p BucketChain.
314  /// Return false if no base element found, otherwise return true.
315  bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
316 
317  /// Rewrite load/store instructions in \p BucketChain according to
318  /// preparation.
319  bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
320  SmallSet<BasicBlock *, 16> &BBChanged,
321  PrepForm Form);
322 
323  /// Rewrite for the base load/store of a chain.
324  std::pair<Instruction *, Instruction *>
325  rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
326  Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
327  SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
328 
329  /// Rewrite for the other load/stores of a chain according to the new \p
330  /// Base.
331  Instruction *
332  rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,
333  const BucketElement &Element, Value *OffToBase,
334  SmallPtrSet<Value *, 16> &DeletedPtrs);
335  };
336 
337 } // end anonymous namespace
338 
339 char PPCLoopInstrFormPrep::ID = 0;
340 static const char *name = "Prepare loop for ppc preferred instruction forms";
341 INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
344 INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
345 
346 static constexpr StringRef PHINodeNameSuffix = ".phi";
347 static constexpr StringRef CastNodeNameSuffix = ".cast";
348 static constexpr StringRef GEPNodeIncNameSuffix = ".inc";
349 static constexpr StringRef GEPNodeOffNameSuffix = ".off";
350 
352  return new PPCLoopInstrFormPrep(TM);
353 }
354 
355 static bool IsPtrInBounds(Value *BasePtr) {
356  Value *StrippedBasePtr = BasePtr;
357  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
358  StrippedBasePtr = BC->getOperand(0);
359  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
360  return GEP->isInBounds();
361 
362  return false;
363 }
364 
365 static std::string getInstrName(const Value *I, StringRef Suffix) {
366  assert(I && "Invalid paramater!");
367  if (I->hasName())
368  return (I->getName() + Suffix).str();
369  else
370  return "";
371 }
372 
374  Type **PtrElementType = nullptr) {
375 
376  Value *PtrValue = nullptr;
377  Type *PointerElementType = nullptr;
378 
379  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
380  PtrValue = LMemI->getPointerOperand();
381  PointerElementType = LMemI->getType();
382  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
383  PtrValue = SMemI->getPointerOperand();
384  PointerElementType = SMemI->getValueOperand()->getType();
385  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
386  PointerElementType = Type::getInt8Ty(MemI->getContext());
387  if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
388  IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
389  PtrValue = IMemI->getArgOperand(0);
390  } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
391  PtrValue = IMemI->getArgOperand(1);
392  }
393  }
394  /*Get ElementType if PtrElementType is not null.*/
395  if (PtrElementType)
396  *PtrElementType = PointerElementType;
397 
398  return PtrValue;
399 }
400 
402  if (skipFunction(F))
403  return false;
404 
405  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
406  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
407  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
408  DT = DTWP ? &DTWP->getDomTree() : nullptr;
409  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
410  ST = TM ? TM->getSubtargetImpl(F) : nullptr;
411  SuccPrepCount = 0;
412 
413  bool MadeChange = false;
414 
415  for (Loop *I : *LI)
416  for (Loop *L : depth_first(I))
417  MadeChange |= runOnLoop(L);
418 
419  return MadeChange;
420 }
421 
422 // Finding the minimal(chain_number + reusable_offset_number) is a complicated
423 // algorithmic problem.
424 // For now, the algorithm used here is simply adjusted to handle the case for
425 // manually unrolling cases.
426 // FIXME: use a more powerful algorithm to find minimal sum of chain_number and
427 // reusable_offset_number for one base with multiple offsets.
428 bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
429  // The minimal size for profitable chain commoning:
430  // A1 = base + offset1
431  // A2 = base + offset2 (offset2 - offset1 = X)
432  // A3 = base + offset3
433  // A4 = base + offset4 (offset4 - offset3 = X)
434  // ======>
435  // base1 = base + offset1
436  // base2 = base + offset3
437  // A1 = base1
438  // A2 = base1 + X
439  // A3 = base2
440  // A4 = base2 + X
441  //
442  // There is benefit because of reuse of offest 'X'.
443 
445  "Thredhold can not be smaller than 4!\n");
446  if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)
447  return false;
448 
449  // We simply select the FirstOffset as the first reusable offset between each
450  // chain element 1 and element 0.
451  const SCEV *FirstOffset = CBucket.Elements[1].Offset;
452 
453  // Figure out how many times above FirstOffset is used in the chain.
454  // For a success commoning chain candidate, offset difference between each
455  // chain element 1 and element 0 must be also FirstOffset.
456  unsigned FirstOffsetReusedCount = 1;
457 
458  // Figure out how many times above FirstOffset is used in the first chain.
459  // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain
460  unsigned FirstOffsetReusedCountInFirstChain = 1;
461 
462  unsigned EleNum = CBucket.Elements.size();
463  bool SawChainSeparater = false;
464  for (unsigned j = 2; j != EleNum; ++j) {
465  if (SE->getMinusSCEV(CBucket.Elements[j].Offset,
466  CBucket.Elements[j - 1].Offset) == FirstOffset) {
467  if (!SawChainSeparater)
468  FirstOffsetReusedCountInFirstChain++;
469  FirstOffsetReusedCount++;
470  } else
471  // For now, if we meet any offset which is not FirstOffset, we assume we
472  // find a new Chain.
473  // This makes us miss some opportunities.
474  // For example, we can common:
475  //
476  // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}
477  //
478  // as two chains:
479  // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}
480  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2
481  //
482  // But we fail to common:
483  //
484  // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}
485  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1
486 
487  SawChainSeparater = true;
488  }
489 
490  // FirstOffset is not reused, skip this bucket.
491  if (FirstOffsetReusedCount == 1)
492  return false;
493 
494  unsigned ChainNum =
495  FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
496 
497  // All elements are increased by FirstOffset.
498  // The number of chains should be sqrt(EleNum).
499  if (!SawChainSeparater)
500  ChainNum = (unsigned)sqrt((double)EleNum);
501 
502  CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
503 
504  // If this is not a perfect chain(eg: not all elements can be put inside
505  // commoning chains.), skip now.
506  if (CBucket.ChainSize * ChainNum != EleNum)
507  return false;
508 
509  if (SawChainSeparater) {
510  // Check that the offset seqs are the same for all chains.
511  for (unsigned i = 1; i < CBucket.ChainSize; i++)
512  for (unsigned j = 1; j < ChainNum; j++)
513  if (CBucket.Elements[i].Offset !=
514  SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,
515  CBucket.Elements[j * CBucket.ChainSize].Offset))
516  return false;
517  }
518 
519  for (unsigned i = 0; i < ChainNum; i++)
520  CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);
521 
522  LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");
523 
524  return true;
525 }
526 
527 bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,
528  SmallVector<Bucket, 16> &Buckets) {
529  bool MadeChange = false;
530 
531  if (Buckets.empty())
532  return MadeChange;
533 
534  SmallSet<BasicBlock *, 16> BBChanged;
535 
536  for (auto &Bucket : Buckets) {
537  if (prepareBasesForCommoningChains(Bucket))
538  MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
539  }
540 
541  if (MadeChange)
542  for (auto *BB : BBChanged)
544  return MadeChange;
545 }
546 
547 bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
548  Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {
549  bool MadeChange = false;
550 
551  assert(Bucket.Elements.size() ==
552  Bucket.ChainBases.size() * Bucket.ChainSize &&
553  "invalid bucket for chain commoning!\n");
554  SmallPtrSet<Value *, 16> DeletedPtrs;
555 
556  BasicBlock *Header = L->getHeader();
557  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
558 
559  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
560  "loopprepare-chaincommon");
561 
562  for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
563  unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
564  const SCEV *BaseSCEV =
565  ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,
566  Bucket.Elements[BaseElemIdx].Offset)
567  : Bucket.BaseSCEV;
568  const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
569 
570  // Make sure the base is able to expand.
571  if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
572  return MadeChange;
573 
574  assert(BasePtrSCEV->isAffine() &&
575  "Invalid SCEV type for the base ptr for a candidate chain!\n");
576 
577  std::pair<Instruction *, Instruction *> Base = rewriteForBase(
578  L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
579  false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
580 
581  if (!Base.first || !Base.second)
582  return MadeChange;
583 
584  // Keep track of the replacement pointer values we've inserted so that we
585  // don't generate more pointer values than necessary.
586  SmallPtrSet<Value *, 16> NewPtrs;
587  NewPtrs.insert(Base.first);
588 
589  for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
590  ++Idx) {
591  BucketElement &I = Bucket.Elements[Idx];
592  Value *Ptr = getPointerOperandAndType(I.Instr);
593  assert(Ptr && "No pointer operand");
594  if (NewPtrs.count(Ptr))
595  continue;
596 
597  const SCEV *OffsetSCEV =
598  BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,
599  Bucket.Elements[BaseElemIdx].Offset)
600  : Bucket.Elements[Idx].Offset;
601 
602  // Make sure offset is able to expand. Only need to check one time as the
603  // offsets are reused between different chains.
604  if (!BaseElemIdx)
605  if (!isSafeToExpand(OffsetSCEV, *SE))
606  return false;
607 
608  Value *OffsetValue = SCEVE.expandCodeFor(
609  OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());
610 
611  Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],
612  OffsetValue, DeletedPtrs);
613 
614  assert(NewPtr && "Wrong rewrite!\n");
615  NewPtrs.insert(NewPtr);
616  }
617 
618  ++ChainCommoningRewritten;
619  }
620 
621  // Clear the rewriter cache, because values that are in the rewriter's cache
622  // can be deleted below, causing the AssertingVH in the cache to trigger.
623  SCEVE.clear();
624 
625  for (auto *Ptr : DeletedPtrs) {
626  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
627  BBChanged.insert(IDel->getParent());
629  }
630 
631  MadeChange = true;
632  return MadeChange;
633 }
634 
635 // Rewrite the new base according to BasePtrSCEV.
636 // bb.loop.preheader:
637 // %newstart = ...
638 // bb.loop.body:
639 // %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]
640 // ...
641 // %add = getelementptr %phinode, %inc
642 //
643 // First returned instruciton is %phinode (or a type cast to %phinode), caller
644 // needs this value to rewrite other load/stores in the same chain.
645 // Second returned instruction is %add, caller needs this value to rewrite other
646 // load/stores in the same chain.
647 std::pair<Instruction *, Instruction *>
648 PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
649  Instruction *BaseMemI, bool CanPreInc,
650  PrepForm Form, SCEVExpander &SCEVE,
651  SmallPtrSet<Value *, 16> &DeletedPtrs) {
652 
653  LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
654 
655  assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
656 
658  assert(BasePtr && "No pointer operand");
659 
660  Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
661  Type *I8PtrTy =
662  Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
663  BasePtr->getType()->getPointerAddressSpace());
664 
665  bool IsConstantInc = false;
666  const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
667  Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
668 
669  const SCEVConstant *BasePtrIncConstantSCEV =
670  dyn_cast<SCEVConstant>(BasePtrIncSCEV);
671  if (BasePtrIncConstantSCEV)
672  IsConstantInc = true;
673 
674  // No valid representation for the increment.
675  if (!IncNode) {
676  LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
677  return std::make_pair(nullptr, nullptr);
678  }
679 
680  if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
681  LLVM_DEBUG(
682  dbgs()
683  << "Update form prepare for non-const increment is not enabled!\n");
684  return std::make_pair(nullptr, nullptr);
685  }
686 
687  const SCEV *BasePtrStartSCEV = nullptr;
688  if (CanPreInc) {
689  assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
690  "Increment is not loop invariant!\n");
691  BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),
692  IsConstantInc ? BasePtrIncConstantSCEV
693  : BasePtrIncSCEV);
694  } else
695  BasePtrStartSCEV = BasePtrSCEV->getStart();
696 
697  if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
698  LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
699  return std::make_pair(nullptr, nullptr);
700  }
701 
702  LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
703 
704  BasicBlock *Header = L->getHeader();
705  unsigned HeaderLoopPredCount = pred_size(Header);
706  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
707 
708  PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
709  getInstrName(BaseMemI, PHINodeNameSuffix),
710  Header->getFirstNonPHI());
711 
712  Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
713  LoopPredecessor->getTerminator());
714 
715  // Note that LoopPredecessor might occur in the predecessor list multiple
716  // times, and we need to add it the right number of times.
717  for (auto PI : predecessors(Header)) {
718  if (PI != LoopPredecessor)
719  continue;
720 
721  NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
722  }
723 
724  Instruction *PtrInc = nullptr;
725  Instruction *NewBasePtr = nullptr;
726  if (CanPreInc) {
727  Instruction *InsPoint = &*Header->getFirstInsertionPt();
728  PtrInc = GetElementPtrInst::Create(
729  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
730  InsPoint);
731  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
732  for (auto PI : predecessors(Header)) {
733  if (PI == LoopPredecessor)
734  continue;
735 
736  NewPHI->addIncoming(PtrInc, PI);
737  }
738  if (PtrInc->getType() != BasePtr->getType())
739  NewBasePtr =
740  new BitCastInst(PtrInc, BasePtr->getType(),
741  getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
742  else
743  NewBasePtr = PtrInc;
744  } else {
745  // Note that LoopPredecessor might occur in the predecessor list multiple
746  // times, and we need to make sure no more incoming value for them in PHI.
747  for (auto PI : predecessors(Header)) {
748  if (PI == LoopPredecessor)
749  continue;
750 
751  // For the latch predecessor, we need to insert a GEP just before the
752  // terminator to increase the address.
753  BasicBlock *BB = PI;
754  Instruction *InsPoint = BB->getTerminator();
755  PtrInc = GetElementPtrInst::Create(
756  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
757  InsPoint);
758  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
759 
760  NewPHI->addIncoming(PtrInc, PI);
761  }
762  PtrInc = NewPHI;
763  if (NewPHI->getType() != BasePtr->getType())
764  NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),
766  &*Header->getFirstInsertionPt());
767  else
768  NewBasePtr = NewPHI;
769  }
770 
771  BasePtr->replaceAllUsesWith(NewBasePtr);
772 
773  DeletedPtrs.insert(BasePtr);
774 
775  return std::make_pair(NewBasePtr, PtrInc);
776 }
777 
778 Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
779  std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,
780  Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {
781  Instruction *NewBasePtr = Base.first;
782  Instruction *PtrInc = Base.second;
783  assert((NewBasePtr && PtrInc) && "base does not exist!\n");
784 
785  Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());
786 
787  Value *Ptr = getPointerOperandAndType(Element.Instr);
788  assert(Ptr && "No pointer operand");
789 
790  Instruction *RealNewPtr;
791  if (!Element.Offset ||
792  (isa<SCEVConstant>(Element.Offset) &&
793  cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
794  RealNewPtr = NewBasePtr;
795  } else {
796  Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
797  if (PtrIP && isa<Instruction>(NewBasePtr) &&
798  cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
799  PtrIP = nullptr;
800  else if (PtrIP && isa<PHINode>(PtrIP))
801  PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
802  else if (!PtrIP)
803  PtrIP = Element.Instr;
804 
805  assert(OffToBase && "There should be an offset for non base element!\n");
807  I8Ty, PtrInc, OffToBase,
808  getInstrName(Element.Instr, GEPNodeOffNameSuffix), PtrIP);
809  if (!PtrIP)
810  NewPtr->insertAfter(cast<Instruction>(PtrInc));
811  NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
812  RealNewPtr = NewPtr;
813  }
814 
815  Instruction *ReplNewPtr;
816  if (Ptr->getType() != RealNewPtr->getType()) {
817  ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
819  ReplNewPtr->insertAfter(RealNewPtr);
820  } else
821  ReplNewPtr = RealNewPtr;
822 
823  Ptr->replaceAllUsesWith(ReplNewPtr);
824  DeletedPtrs.insert(Ptr);
825 
826  return ReplNewPtr;
827 }
828 
829 void PPCLoopInstrFormPrep::addOneCandidate(
830  Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,
831  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
832  assert((MemI && getPointerOperandAndType(MemI)) &&
833  "Candidate should be a memory instruction.");
834  assert(LSCEV && "Invalid SCEV for Ptr value.");
835 
836  bool FoundBucket = false;
837  for (auto &B : Buckets) {
838  if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=
839  cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
840  continue;
841  const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
842  if (isValidDiff(Diff)) {
843  B.Elements.push_back(BucketElement(Diff, MemI));
844  FoundBucket = true;
845  break;
846  }
847  }
848 
849  if (!FoundBucket) {
850  if (Buckets.size() == MaxCandidateNum) {
851  LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "
852  << MaxCandidateNum << "\n");
853  return;
854  }
855  Buckets.push_back(Bucket(LSCEV, MemI));
856  }
857 }
858 
859 SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
860  Loop *L,
861  std::function<bool(const Instruction *, Value *, const Type *)>
862  isValidCandidate,
863  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
864  SmallVector<Bucket, 16> Buckets;
865 
866  for (const auto &BB : L->blocks())
867  for (auto &J : *BB) {
868  Value *PtrValue = nullptr;
869  Type *PointerElementType = nullptr;
870  PtrValue = getPointerOperandAndType(&J, &PointerElementType);
871 
872  if (!PtrValue)
873  continue;
874 
875  if (PtrValue->getType()->getPointerAddressSpace())
876  continue;
877 
878  if (L->isLoopInvariant(PtrValue))
879  continue;
880 
881  const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
882  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
883  if (!LARSCEV || LARSCEV->getLoop() != L)
884  continue;
885 
886  // Mark that we have candidates for preparing.
887  HasCandidateForPrepare = true;
888 
889  if (isValidCandidate(&J, PtrValue, PointerElementType))
890  addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
891  }
892  return Buckets;
893 }
894 
895 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
896  PrepForm Form) {
897  // RemainderOffsetInfo details:
898  // key: value of (Offset urem DispConstraint). For DSForm, it can
899  // be [0, 4).
900  // first of pair: the index of first BucketElement whose remainder is equal
901  // to key. For key 0, this value must be 0.
902  // second of pair: number of load/stores with the same remainder.
904 
905  for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
906  if (!BucketChain.Elements[j].Offset)
907  RemainderOffsetInfo[0] = std::make_pair(0, 1);
908  else {
909  unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
910  ->getAPInt()
911  .urem(Form);
912  if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
913  RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
914  else
915  RemainderOffsetInfo[Remainder].second++;
916  }
917  }
918  // Currently we choose the most profitable base as the one which has the max
919  // number of load/store with same remainder.
920  // FIXME: adjust the base selection strategy according to load/store offset
921  // distribution.
922  // For example, if we have one candidate chain for DS form preparation, which
923  // contains following load/stores with different remainders:
924  // 1: 10 load/store whose remainder is 1;
925  // 2: 9 load/store whose remainder is 2;
926  // 3: 1 for remainder 3 and 0 for remainder 0;
927  // Now we will choose the first load/store whose remainder is 1 as base and
928  // adjust all other load/stores according to new base, so we will get 10 DS
929  // form and 10 X form.
930  // But we should be more clever, for this case we could use two bases, one for
931  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
932  // 1 X form.
933  unsigned MaxCountRemainder = 0;
934  for (unsigned j = 0; j < (unsigned)Form; j++)
935  if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
936  RemainderOffsetInfo[j].second >
937  RemainderOffsetInfo[MaxCountRemainder].second)
938  MaxCountRemainder = j;
939 
940  // Abort when there are too few insts with common base.
941  if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
942  return false;
943 
944  // If the first value is most profitable, no needed to adjust BucketChain
945  // elements as they are substracted the first value when collecting.
946  if (MaxCountRemainder == 0)
947  return true;
948 
949  // Adjust load/store to the new chosen base.
950  const SCEV *Offset =
951  BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
952  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
953  for (auto &E : BucketChain.Elements) {
954  if (E.Offset)
955  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
956  else
957  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
958  }
959 
960  std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
961  BucketChain.Elements[0]);
962  return true;
963 }
964 
965 // FIXME: implement a more clever base choosing policy.
966 // Currently we always choose an exist load/store offset. This maybe lead to
967 // suboptimal code sequences. For example, for one DS chain with offsets
968 // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
969 // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
970 // multipler of 4, it cannot be represented by sint16.
971 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
972  // We have a choice now of which instruction's memory operand we use as the
973  // base for the generated PHI. Always picking the first instruction in each
974  // bucket does not work well, specifically because that instruction might
975  // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
976  // the choice is somewhat arbitrary, because the backend will happily
977  // generate direct offsets from both the pre-incremented and
978  // post-incremented pointer values. Thus, we'll pick the first non-prefetch
979  // instruction in each bucket, and adjust the recurrence and other offsets
980  // accordingly.
981  for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
982  if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
983  if (II->getIntrinsicID() == Intrinsic::prefetch)
984  continue;
985 
986  // If we'd otherwise pick the first element anyway, there's nothing to do.
987  if (j == 0)
988  break;
989 
990  // If our chosen element has no offset from the base pointer, there's
991  // nothing to do.
992  if (!BucketChain.Elements[j].Offset ||
993  cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())
994  break;
995 
996  const SCEV *Offset = BucketChain.Elements[j].Offset;
997  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
998  for (auto &E : BucketChain.Elements) {
999  if (E.Offset)
1000  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
1001  else
1002  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
1003  }
1004 
1005  std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
1006  break;
1007  }
1008  return true;
1009 }
1010 
1011 bool PPCLoopInstrFormPrep::rewriteLoadStores(
1012  Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
1013  PrepForm Form) {
1014  bool MadeChange = false;
1015 
1016  const SCEVAddRecExpr *BasePtrSCEV =
1017  cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
1018  if (!BasePtrSCEV->isAffine())
1019  return MadeChange;
1020 
1021  if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
1022  return MadeChange;
1023 
1024  SmallPtrSet<Value *, 16> DeletedPtrs;
1025 
1026  BasicBlock *Header = L->getHeader();
1027  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
1028  "loopprepare-formrewrite");
1029 
1030  // For some DS form load/store instructions, it can also be an update form,
1031  // if the stride is constant and is a multipler of 4. Use update form if
1032  // prefer it.
1033  bool CanPreInc = (Form == UpdateForm ||
1034  ((Form == DSForm) &&
1035  isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&
1036  !cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))
1037  ->getAPInt()
1038  .urem(4) &&
1039  PreferUpdateForm));
1040 
1041  std::pair<Instruction *, Instruction *> Base =
1042  rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
1043  CanPreInc, Form, SCEVE, DeletedPtrs);
1044 
1045  if (!Base.first || !Base.second)
1046  return MadeChange;
1047 
1048  // Keep track of the replacement pointer values we've inserted so that we
1049  // don't generate more pointer values than necessary.
1050  SmallPtrSet<Value *, 16> NewPtrs;
1051  NewPtrs.insert(Base.first);
1052 
1053  for (auto I = std::next(BucketChain.Elements.begin()),
1054  IE = BucketChain.Elements.end(); I != IE; ++I) {
1055  Value *Ptr = getPointerOperandAndType(I->Instr);
1056  assert(Ptr && "No pointer operand");
1057  if (NewPtrs.count(Ptr))
1058  continue;
1059 
1060  Instruction *NewPtr = rewriteForBucketElement(
1061  Base, *I,
1062  I->Offset ? cast<SCEVConstant>(I->Offset)->getValue() : nullptr,
1063  DeletedPtrs);
1064  assert(NewPtr && "wrong rewrite!\n");
1065  NewPtrs.insert(NewPtr);
1066  }
1067 
1068  // Clear the rewriter cache, because values that are in the rewriter's cache
1069  // can be deleted below, causing the AssertingVH in the cache to trigger.
1070  SCEVE.clear();
1071 
1072  for (auto *Ptr : DeletedPtrs) {
1073  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
1074  BBChanged.insert(IDel->getParent());
1076  }
1077 
1078  MadeChange = true;
1079 
1080  SuccPrepCount++;
1081 
1082  if (Form == DSForm && !CanPreInc)
1083  DSFormChainRewritten++;
1084  else if (Form == DQForm)
1085  DQFormChainRewritten++;
1086  else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
1087  UpdFormChainRewritten++;
1088 
1089  return MadeChange;
1090 }
1091 
1092 bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
1093  SmallVector<Bucket, 16> &Buckets) {
1094  bool MadeChange = false;
1095  if (Buckets.empty())
1096  return MadeChange;
1097  SmallSet<BasicBlock *, 16> BBChanged;
1098  for (auto &Bucket : Buckets)
1099  // The base address of each bucket is transformed into a phi and the others
1100  // are rewritten based on new base.
1101  if (prepareBaseForUpdateFormChain(Bucket))
1102  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
1103 
1104  if (MadeChange)
1105  for (auto *BB : BBChanged)
1106  DeleteDeadPHIs(BB);
1107  return MadeChange;
1108 }
1109 
1110 bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
1111  SmallVector<Bucket, 16> &Buckets,
1112  PrepForm Form) {
1113  bool MadeChange = false;
1114 
1115  if (Buckets.empty())
1116  return MadeChange;
1117 
1118  SmallSet<BasicBlock *, 16> BBChanged;
1119  for (auto &Bucket : Buckets) {
1120  if (Bucket.Elements.size() < DispFormPrepMinThreshold)
1121  continue;
1122  if (prepareBaseForDispFormChain(Bucket, Form))
1123  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
1124  }
1125 
1126  if (MadeChange)
1127  for (auto *BB : BBChanged)
1128  DeleteDeadPHIs(BB);
1129  return MadeChange;
1130 }
1131 
1132 // Find the loop invariant increment node for SCEV BasePtrIncSCEV.
1133 // bb.loop.preheader:
1134 // %start = ...
1135 // bb.loop.body:
1136 // %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
1137 // ...
1138 // %add = add %phinode, %inc ; %inc is what we want to get.
1139 //
1140 Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
1141  const SCEV *BasePtrIncSCEV) {
1142  // If the increment is a constant, no definition is needed.
1143  // Return the value directly.
1144  if (isa<SCEVConstant>(BasePtrIncSCEV))
1145  return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
1146 
1147  if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
1148  return nullptr;
1149 
1150  BasicBlock *BB = MemI->getParent();
1151  if (!BB)
1152  return nullptr;
1153 
1154  BasicBlock *LatchBB = L->getLoopLatch();
1155 
1156  if (!LatchBB)
1157  return nullptr;
1158 
1159  // Run through the PHIs and check their operands to find valid representation
1160  // for the increment SCEV.
1161  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1162  for (auto &CurrentPHI : PHIIter) {
1163  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1164  if (!CurrentPHINode)
1165  continue;
1166 
1167  if (!SE->isSCEVable(CurrentPHINode->getType()))
1168  continue;
1169 
1170  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1171 
1172  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1173  if (!PHIBasePtrSCEV)
1174  continue;
1175 
1176  const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
1177 
1178  if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
1179  continue;
1180 
1181  // Get the incoming value from the loop latch and check if the value has
1182  // the add form with the required increment.
1183  if (Instruction *I = dyn_cast<Instruction>(
1184  CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
1185  Value *StrippedBaseI = I;
1186  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
1187  StrippedBaseI = BC->getOperand(0);
1188 
1189  Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
1190  if (!StrippedI)
1191  continue;
1192 
1193  // LSR pass may add a getelementptr instruction to do the loop increment,
1194  // also search in that getelementptr instruction.
1195  if (StrippedI->getOpcode() == Instruction::Add ||
1196  (StrippedI->getOpcode() == Instruction::GetElementPtr &&
1197  StrippedI->getNumOperands() == 2)) {
1198  if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
1199  return StrippedI->getOperand(0);
1200  if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
1201  return StrippedI->getOperand(1);
1202  }
1203  }
1204  }
1205  return nullptr;
1206 }
1207 
1208 // In order to prepare for the preferred instruction form, a PHI is added.
1209 // This function will check to see if that PHI already exists and will return
1210 // true if it found an existing PHI with the matched start and increment as the
1211 // one we wanted to create.
1212 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
1213  const SCEV *BasePtrStartSCEV,
1214  const SCEV *BasePtrIncSCEV,
1215  PrepForm Form) {
1216  BasicBlock *BB = MemI->getParent();
1217  if (!BB)
1218  return false;
1219 
1220  BasicBlock *PredBB = L->getLoopPredecessor();
1221  BasicBlock *LatchBB = L->getLoopLatch();
1222 
1223  if (!PredBB || !LatchBB)
1224  return false;
1225 
1226  // Run through the PHIs and see if we have some that looks like a preparation
1227  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1228  for (auto & CurrentPHI : PHIIter) {
1229  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1230  if (!CurrentPHINode)
1231  continue;
1232 
1233  if (!SE->isSCEVable(CurrentPHINode->getType()))
1234  continue;
1235 
1236  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1237 
1238  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1239  if (!PHIBasePtrSCEV)
1240  continue;
1241 
1242  const SCEVConstant *PHIBasePtrIncSCEV =
1243  dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
1244  if (!PHIBasePtrIncSCEV)
1245  continue;
1246 
1247  if (CurrentPHINode->getNumIncomingValues() == 2) {
1248  if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
1249  CurrentPHINode->getIncomingBlock(1) == PredBB) ||
1250  (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
1251  CurrentPHINode->getIncomingBlock(0) == PredBB)) {
1252  if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
1253  // The existing PHI (CurrentPHINode) has the same start and increment
1254  // as the PHI that we wanted to create.
1255  if ((Form == UpdateForm || Form == ChainCommoning ) &&
1256  PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
1257  ++PHINodeAlreadyExistsUpdate;
1258  return true;
1259  }
1260  if (Form == DSForm || Form == DQForm) {
1261  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
1262  SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
1263  if (Diff && !Diff->getAPInt().urem(Form)) {
1264  if (Form == DSForm)
1265  ++PHINodeAlreadyExistsDS;
1266  else
1267  ++PHINodeAlreadyExistsDQ;
1268  return true;
1269  }
1270  }
1271  }
1272  }
1273  }
1274  }
1275  return false;
1276 }
1277 
1278 bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
1279  bool MadeChange = false;
1280 
1281  // Only prep. the inner-most loop
1282  if (!L->isInnermost())
1283  return MadeChange;
1284 
1285  // Return if already done enough preparation.
1286  if (SuccPrepCount >= MaxVarsPrep)
1287  return MadeChange;
1288 
1289  LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
1290 
1291  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
1292  // If there is no loop predecessor, or the loop predecessor's terminator
1293  // returns a value (which might contribute to determining the loop's
1294  // iteration space), insert a new preheader for the loop.
1295  if (!LoopPredecessor ||
1296  !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
1297  LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
1298  if (LoopPredecessor)
1299  MadeChange = true;
1300  }
1301  if (!LoopPredecessor) {
1302  LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
1303  return MadeChange;
1304  }
1305  // Check if a load/store has update form. This lambda is used by function
1306  // collectCandidates which can collect candidates for types defined by lambda.
1307  auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,
1308  const Type *PointerElementType) {
1309  assert((PtrValue && I) && "Invalid parameter!");
1310  // There are no update forms for Altivec vector load/stores.
1311  if (ST && ST->hasAltivec() && PointerElementType->isVectorTy())
1312  return false;
1313  // There are no update forms for P10 lxvp/stxvp intrinsic.
1314  auto *II = dyn_cast<IntrinsicInst>(I);
1315  if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
1316  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
1317  return false;
1318  // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
1319  // be 4's multiple (DS-form). For i64 loads/stores when the displacement
1320  // fits in a 16-bit signed field but isn't a multiple of 4, it will be
1321  // useless and possible to break some original well-form addressing mode
1322  // to make this pre-inc prep for it.
1323  if (PointerElementType->isIntegerTy(64)) {
1324  const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
1325  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
1326  if (!LARSCEV || LARSCEV->getLoop() != L)
1327  return false;
1328  if (const SCEVConstant *StepConst =
1329  dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
1330  const APInt &ConstInt = StepConst->getValue()->getValue();
1331  if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
1332  return false;
1333  }
1334  }
1335  return true;
1336  };
1337 
1338  // Check if a load/store has DS form.
1339  auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,
1340  const Type *PointerElementType) {
1341  assert((PtrValue && I) && "Invalid parameter!");
1342  if (isa<IntrinsicInst>(I))
1343  return false;
1344  return (PointerElementType->isIntegerTy(64)) ||
1345  (PointerElementType->isFloatTy()) ||
1346  (PointerElementType->isDoubleTy()) ||
1347  (PointerElementType->isIntegerTy(32) &&
1348  llvm::any_of(I->users(),
1349  [](const User *U) { return isa<SExtInst>(U); }));
1350  };
1351 
1352  // Check if a load/store has DQ form.
1353  auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,
1354  const Type *PointerElementType) {
1355  assert((PtrValue && I) && "Invalid parameter!");
1356  // Check if it is a P10 lxvp/stxvp intrinsic.
1357  auto *II = dyn_cast<IntrinsicInst>(I);
1358  if (II)
1359  return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
1360  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
1361  // Check if it is a P9 vector load/store.
1362  return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());
1363  };
1364 
1365  // Check if a load/store is candidate for chain commoning.
1366  // If the SCEV is only with one ptr operand in its start, we can use that
1367  // start as a chain separator. Mark this load/store as a candidate.
1368  auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,
1369  const Type *PointerElementType) {
1370  const SCEVAddRecExpr *ARSCEV =
1371  cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));
1372  if (!ARSCEV)
1373  return false;
1374 
1375  if (!ARSCEV->isAffine())
1376  return false;
1377 
1378  const SCEV *Start = ARSCEV->getStart();
1379 
1380  // A single pointer. We can treat it as offset 0.
1381  if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
1382  return true;
1383 
1384  const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
1385 
1386  // We need a SCEVAddExpr to include both base and offset.
1387  if (!ASCEV)
1388  return false;
1389 
1390  // Make sure there is only one pointer operand(base) and all other operands
1391  // are integer type.
1392  bool SawPointer = false;
1393  for (const SCEV *Op : ASCEV->operands()) {
1394  if (Op->getType()->isPointerTy()) {
1395  if (SawPointer)
1396  return false;
1397  SawPointer = true;
1398  } else if (!Op->getType()->isIntegerTy())
1399  return false;
1400  }
1401 
1402  return SawPointer;
1403  };
1404 
1405  // Check if the diff is a constant type. This is used for update/DS/DQ form
1406  // preparation.
1407  auto isValidConstantDiff = [](const SCEV *Diff) {
1408  return dyn_cast<SCEVConstant>(Diff) != nullptr;
1409  };
1410 
1411  // Make sure the diff between the base and new candidate is required type.
1412  // This is used for chain commoning preparation.
1413  auto isValidChainCommoningDiff = [](const SCEV *Diff) {
1414  assert(Diff && "Invalid Diff!\n");
1415 
1416  // Don't mess up previous dform prepare.
1417  if (isa<SCEVConstant>(Diff))
1418  return false;
1419 
1420  // A single integer type offset.
1421  if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())
1422  return true;
1423 
1424  const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
1425  if (!ADiff)
1426  return false;
1427 
1428  for (const SCEV *Op : ADiff->operands())
1429  if (!Op->getType()->isIntegerTy())
1430  return false;
1431 
1432  return true;
1433  };
1434 
1435  HasCandidateForPrepare = false;
1436 
1437  LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");
1438  // Collect buckets of comparable addresses used by loads and stores for update
1439  // form.
1440  SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(
1441  L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);
1442 
1443  // Prepare for update form.
1444  if (!UpdateFormBuckets.empty())
1445  MadeChange |= updateFormPrep(L, UpdateFormBuckets);
1446  else if (!HasCandidateForPrepare) {
1447  LLVM_DEBUG(
1448  dbgs()
1449  << "No prepare candidates found, stop praparation for current loop!\n");
1450  // If no candidate for preparing, return early.
1451  return MadeChange;
1452  }
1453 
1454  LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");
1455  // Collect buckets of comparable addresses used by loads and stores for DS
1456  // form.
1457  SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(
1458  L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);
1459 
1460  // Prepare for DS form.
1461  if (!DSFormBuckets.empty())
1462  MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
1463 
1464  LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");
1465  // Collect buckets of comparable addresses used by loads and stores for DQ
1466  // form.
1467  SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(
1468  L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);
1469 
1470  // Prepare for DQ form.
1471  if (!DQFormBuckets.empty())
1472  MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
1473 
1474  // Collect buckets of comparable addresses used by loads and stores for chain
1475  // commoning. With chain commoning, we reuse offsets between the chains, so
1476  // the register pressure will be reduced.
1477  if (!EnableChainCommoning) {
1478  LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");
1479  return MadeChange;
1480  }
1481 
1482  LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");
1483  SmallVector<Bucket, 16> Buckets =
1484  collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
1486 
1487  // Prepare for chain commoning.
1488  if (!Buckets.empty())
1489  MadeChange |= chainCommoning(L, Buckets);
1490 
1491  return MadeChange;
1492 }
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:517
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1828
MaxVarsChainCommon
static cl::opt< unsigned > MaxVarsChainCommon("ppc-chaincommon-max-vars", cl::Hidden, cl::init(4), cl::desc("Bucket number per loop for PPC loop chain common"))
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4424
GEPNodeIncNameSuffix
static constexpr StringRef GEPNodeIncNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:348
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2618
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
IntrinsicInst.h
ScalarEvolutionExpander.h
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:420
MaxVarsDQForm
static cl::opt< unsigned > MaxVarsDQForm("ppc-dqprep-max-vars", cl::Hidden, cl::init(8), cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"))
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
Pass.h
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
CastNodeNameSuffix
static constexpr StringRef CastNodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:347
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::pred_size
unsigned pred_size(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:194
llvm::cast
decltype(auto) LLVM_NODISCARD cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
llvm::dwarf::Form
Form
Definition: Dwarf.h:132
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
Local.h
llvm::createPPCLoopInstrFormPrepPass
FunctionPass * createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM)
Definition: PPCLoopInstrFormPrep.cpp:351
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:275
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
getPointerOperandAndType
static Value * getPointerOperandAndType(Value *MemI, Type **PtrElementType=nullptr)
Definition: PPCLoopInstrFormPrep.cpp:373
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
PHINodeNameSuffix
static constexpr StringRef PHINodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:346
llvm::SmallPtrSet< Value *, 16 >
EnableChainCommoning
static cl::opt< bool > EnableChainCommoning("ppc-formprep-chain-commoning", cl::init(false), cl::Hidden, cl::desc("Enable chain commoning in PPC loop prepare pass."))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::SCEVNAryExpr::operands
op_range operands() const
Definition: ScalarEvolutionExpressions.h:211
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::initializePPCLoopInstrFormPrepPass
void initializePPCLoopInstrFormPrepPass(PassRegistry &)
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
PPCSubtarget.h
CommandLine.h
llvm::Instruction::insertAfter
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition: Instruction.cpp:89
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
EnableUpdateFormForNonConstInc
static cl::opt< bool > EnableUpdateFormForNonConstInc("ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden, cl::desc("prepare update form when the load/store increment is a loop " "invariant non-const value."))
MaxVarsPrep
static cl::opt< unsigned > MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24), cl::desc("Potential common base number threshold per function " "for PPC loop prep"))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MaxVarsDSForm
static cl::opt< unsigned > MaxVarsDSForm("ppc-dsprep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"))
llvm::User
Definition: User.h:44
name
static const char * name
Definition: PPCLoopInstrFormPrep.cpp:340
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2836
llvm::Instruction
Definition: Instruction.h:42
MaxVarsUpdateForm
static cl::opt< unsigned > MaxVarsUpdateForm("ppc-preinc-prep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of update " "form"))
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
PPC.h
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2153
SmallPtrSet.h
llvm::SCEVNAryExpr
This node is a base class providing common functionality for n'ary operators.
Definition: ScalarEvolutionExpressions.h:184
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
Utils.h
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2743
ChainCommonPrepMinThreshold
static cl::opt< unsigned > ChainCommonPrepMinThreshold("ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4), cl::desc("Minimal common base load/store instructions triggering chain " "commoning preparation. Must be not smaller than 4"))
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::APInt::srem
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1734
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
DispFormPrepMinThreshold
static cl::opt< unsigned > DispFormPrepMinThreshold("ppc-dispprep-min-threshold", cl::Hidden, cl::init(2), cl::desc("Minimal common base load/store instructions triggering DS/DQ form " "preparation"))
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:240
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
inc
Current eax eax eax ret Ideal eax eax ret Re implement atomic builtins x86 does not have to use add to implement these it can use inc
Definition: README.txt:1367
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
GEPNodeOffNameSuffix
static constexpr StringRef GEPNodeOffNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:349
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::SCEVExpander::clear
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Definition: ScalarEvolutionExpander.h:199
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::APInt::urem
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1664
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getInstrName
static std::string getInstrName(const Value *I, StringRef Suffix)
Definition: PPCLoopInstrFormPrep.cpp:365
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:942
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PPCLoopInstrFormPrep.cpp:114
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:845
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
PreferUpdateForm
static cl::opt< bool > PreferUpdateForm("ppc-formprep-prefer-update", cl::init(true), cl::Hidden, cl::desc("prefer update form when ds form is also a update form"))
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13400
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:486
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4510
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2693
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4288
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:162
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:70
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
ScalarEvolutionExpressions.h
IsPtrInBounds
static bool IsPtrInBounds(Value *BasePtr)
Definition: PPCLoopInstrFormPrep.cpp:355
Instructions.h
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:71
llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:257
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2767
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:392
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2453
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9421
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
PPCTargetMachine.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38