LLVM  7.0.0svn
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the PPC branch instructions
11 // that decrement and test the count register (CTR) (bdnz and friends).
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 //
18 // Criteria for CTR loops:
19 // - Countable loops (w/ ind. var for a trip count)
20 // - Try inner-most loops first
21 // - No nested CTR loops.
22 // - No function calls in loops.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "PPC.h"
27 #include "PPCSubtarget.h"
28 #include "PPCTargetMachine.h"
29 #include "PPCTargetTransformInfo.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/CFG.h"
35 #include "llvm/Analysis/LoopInfo.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/InlineAsm.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/ValueHandle.h"
51 #include "llvm/PassSupport.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
56 #include "llvm/Transforms/Utils.h"
59 
60 #ifndef NDEBUG
65 #endif
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "ctrloops"
70 
71 #ifndef NDEBUG
72 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
73 #endif
74 
75 // The latency of mtctr is only justified if there are more than 4
76 // comparisons that will be removed as a result.
77 static cl::opt<unsigned>
78 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
79  cl::desc("Loops with a constant trip count smaller than "
80  "this value will not use the count register."));
81 
82 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
83 
84 namespace llvm {
86 #ifndef NDEBUG
88 #endif
89 }
90 
91 namespace {
92  struct PPCCTRLoops : public FunctionPass {
93 
94 #ifndef NDEBUG
95  static int Counter;
96 #endif
97 
98  public:
99  static char ID;
100 
101  PPCCTRLoops() : FunctionPass(ID) {
103  }
104 
105  bool runOnFunction(Function &F) override;
106 
107  void getAnalysisUsage(AnalysisUsage &AU) const override {
115  }
116 
117  private:
118  bool mightUseCTR(BasicBlock *BB);
119  bool convertToCTRLoop(Loop *L);
120 
121  private:
122  const PPCTargetMachine *TM;
123  const PPCSubtarget *STI;
124  const PPCTargetLowering *TLI;
125  const DataLayout *DL;
126  const TargetLibraryInfo *LibInfo;
127  const TargetTransformInfo *TTI;
128  LoopInfo *LI;
129  ScalarEvolution *SE;
130  DominatorTree *DT;
131  bool PreserveLCSSA;
132  TargetSchedModel SchedModel;
133  };
134 
135  char PPCCTRLoops::ID = 0;
136 #ifndef NDEBUG
137  int PPCCTRLoops::Counter = 0;
138 #endif
139 
140 #ifndef NDEBUG
141  struct PPCCTRLoopsVerify : public MachineFunctionPass {
142  public:
143  static char ID;
144 
145  PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
147  }
148 
149  void getAnalysisUsage(AnalysisUsage &AU) const override {
152  }
153 
154  bool runOnMachineFunction(MachineFunction &MF) override;
155 
156  private:
158  };
159 
160  char PPCCTRLoopsVerify::ID = 0;
161 #endif // NDEBUG
162 } // end anonymous namespace
163 
164 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
165  false, false)
169 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
170  false, false)
171 
172 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
173 
174 #ifndef NDEBUG
175 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
176  "PowerPC CTR Loops Verify", false, false)
178 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
179  "PowerPC CTR Loops Verify", false, false)
180 
182  return new PPCCTRLoopsVerify();
183 }
184 #endif // NDEBUG
185 
187  if (skipFunction(F))
188  return false;
189 
190  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
191  if (!TPC)
192  return false;
193 
194  TM = &TPC->getTM<PPCTargetMachine>();
195  STI = TM->getSubtargetImpl(F);
196  TLI = STI->getTargetLowering();
197 
198  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
200  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
201  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
202  DL = &F.getParent()->getDataLayout();
203  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
204  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
205  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
206 
207  bool MadeChange = false;
208 
209  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
210  I != E; ++I) {
211  Loop *L = *I;
212  if (!L->getParentLoop())
213  MadeChange |= convertToCTRLoop(L);
214  }
215 
216  return MadeChange;
217 }
218 
219 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
220  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
221  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
222 
223  return false;
224 }
225 
226 // Determining the address of a TLS variable results in a function call in
227 // certain TLS models.
228 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
229  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
230  if (!GV) {
231  // Recurse to check for constants that refer to TLS global variables.
232  if (const auto *CV = dyn_cast<Constant>(MemAddr))
233  for (const auto &CO : CV->operands())
234  if (memAddrUsesCTR(TM, CO))
235  return true;
236 
237  return false;
238  }
239 
240  if (!GV->isThreadLocal())
241  return false;
243  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
244 }
245 
246 // Loop through the inline asm constraints and look for something that clobbers
247 // ctr.
248 static bool asmClobbersCTR(InlineAsm *IA) {
250  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
251  InlineAsm::ConstraintInfo &C = CIV[i];
252  if (C.Type != InlineAsm::isInput)
253  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
254  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
255  return true;
256  }
257  return false;
258 }
259 
260 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
261  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
262  J != JE; ++J) {
263  if (CallInst *CI = dyn_cast<CallInst>(J)) {
264  // Inline ASM is okay, unless it clobbers the ctr register.
265  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
266  if (asmClobbersCTR(IA))
267  return true;
268  continue;
269  }
270 
271  if (Function *F = CI->getCalledFunction()) {
272  // Most intrinsics don't become function calls, but some might.
273  // sin, cos, exp and log are always calls.
274  unsigned Opcode = 0;
276  switch (F->getIntrinsicID()) {
277  default: continue;
278  // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
279  // we're definitely using CTR.
280  case Intrinsic::ppc_is_decremented_ctr_nonzero:
281  case Intrinsic::ppc_mtctr:
282  return true;
283 
284 // VisualStudio defines setjmp as _setjmp
285 #if defined(_MSC_VER) && defined(setjmp) && \
286  !defined(setjmp_undefined_for_msvc)
287 # pragma push_macro("setjmp")
288 # undef setjmp
289 # define setjmp_undefined_for_msvc
290 #endif
291 
292  case Intrinsic::setjmp:
293 
294 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
295  // let's return it to _setjmp state
296 # pragma pop_macro("setjmp")
297 # undef setjmp_undefined_for_msvc
298 #endif
299 
300  case Intrinsic::longjmp:
301 
302  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
303  // because, although it does clobber the counter register, the
304  // control can't then return to inside the loop unless there is also
305  // an eh_sjlj_setjmp.
306  case Intrinsic::eh_sjlj_setjmp:
307 
308  case Intrinsic::memcpy:
309  case Intrinsic::memmove:
310  case Intrinsic::memset:
311  case Intrinsic::powi:
312  case Intrinsic::log:
313  case Intrinsic::log2:
314  case Intrinsic::log10:
315  case Intrinsic::exp:
316  case Intrinsic::exp2:
317  case Intrinsic::pow:
318  case Intrinsic::sin:
319  case Intrinsic::cos:
320  return true;
321  case Intrinsic::copysign:
322  if (CI->getArgOperand(0)->getType()->getScalarType()->
323  isPPC_FP128Ty())
324  return true;
325  else
326  continue; // ISD::FCOPYSIGN is never a library call.
327  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
328  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
329  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
330  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
331  case Intrinsic::rint: Opcode = ISD::FRINT; break;
332  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
333  case Intrinsic::round: Opcode = ISD::FROUND; break;
334  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
335  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
336  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
337  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
338  }
339  }
340 
341  // PowerPC does not use [US]DIVREM or other library calls for
342  // operations on regular types which are not otherwise library calls
343  // (i.e. soft float or atomics). If adapting for targets that do,
344  // additional care is required here.
345 
346  LibFunc Func;
347  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
348  LibInfo->getLibFunc(F->getName(), Func) &&
349  LibInfo->hasOptimizedCodeGen(Func)) {
350  // Non-read-only functions are never treated as intrinsics.
351  if (!CI->onlyReadsMemory())
352  return true;
353 
354  // Conversion happens only for FP calls.
355  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
356  return true;
357 
358  switch (Func) {
359  default: return true;
360  case LibFunc_copysign:
361  case LibFunc_copysignf:
362  continue; // ISD::FCOPYSIGN is never a library call.
363  case LibFunc_copysignl:
364  return true;
365  case LibFunc_fabs:
366  case LibFunc_fabsf:
367  case LibFunc_fabsl:
368  continue; // ISD::FABS is never a library call.
369  case LibFunc_sqrt:
370  case LibFunc_sqrtf:
371  case LibFunc_sqrtl:
372  Opcode = ISD::FSQRT; break;
373  case LibFunc_floor:
374  case LibFunc_floorf:
375  case LibFunc_floorl:
376  Opcode = ISD::FFLOOR; break;
377  case LibFunc_nearbyint:
378  case LibFunc_nearbyintf:
379  case LibFunc_nearbyintl:
380  Opcode = ISD::FNEARBYINT; break;
381  case LibFunc_ceil:
382  case LibFunc_ceilf:
383  case LibFunc_ceill:
384  Opcode = ISD::FCEIL; break;
385  case LibFunc_rint:
386  case LibFunc_rintf:
387  case LibFunc_rintl:
388  Opcode = ISD::FRINT; break;
389  case LibFunc_round:
390  case LibFunc_roundf:
391  case LibFunc_roundl:
392  Opcode = ISD::FROUND; break;
393  case LibFunc_trunc:
394  case LibFunc_truncf:
395  case LibFunc_truncl:
396  Opcode = ISD::FTRUNC; break;
397  case LibFunc_fmin:
398  case LibFunc_fminf:
399  case LibFunc_fminl:
400  Opcode = ISD::FMINNUM; break;
401  case LibFunc_fmax:
402  case LibFunc_fmaxf:
403  case LibFunc_fmaxl:
404  Opcode = ISD::FMAXNUM; break;
405  }
406  }
407 
408  if (Opcode) {
409  EVT EVTy =
410  TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
411 
412  if (EVTy == MVT::Other)
413  return true;
414 
415  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
416  continue;
417  else if (EVTy.isVector() &&
418  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
419  continue;
420 
421  return true;
422  }
423  }
424 
425  return true;
426  } else if (isa<BinaryOperator>(J) &&
427  J->getType()->getScalarType()->isPPC_FP128Ty()) {
428  // Most operations on ppc_f128 values become calls.
429  return true;
430  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
431  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
432  CastInst *CI = cast<CastInst>(J);
433  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
434  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
435  isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
436  isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
437  return true;
438  } else if (isLargeIntegerTy(!TM->isPPC64(),
439  J->getType()->getScalarType()) &&
440  (J->getOpcode() == Instruction::UDiv ||
441  J->getOpcode() == Instruction::SDiv ||
442  J->getOpcode() == Instruction::URem ||
443  J->getOpcode() == Instruction::SRem)) {
444  return true;
445  } else if (!TM->isPPC64() &&
446  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
447  (J->getOpcode() == Instruction::Shl ||
448  J->getOpcode() == Instruction::AShr ||
449  J->getOpcode() == Instruction::LShr)) {
450  // Only on PPC32, for 128-bit integers (specifically not 64-bit
451  // integers), these might be runtime calls.
452  return true;
453  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
454  // On PowerPC, indirect jumps use the counter register.
455  return true;
456  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
457  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
458  return true;
459  }
460 
461  // FREM is always a call.
462  if (J->getOpcode() == Instruction::FRem)
463  return true;
464 
465  if (STI->useSoftFloat()) {
466  switch(J->getOpcode()) {
467  case Instruction::FAdd:
468  case Instruction::FSub:
469  case Instruction::FMul:
470  case Instruction::FDiv:
471  case Instruction::FPTrunc:
472  case Instruction::FPExt:
473  case Instruction::FPToUI:
474  case Instruction::FPToSI:
475  case Instruction::UIToFP:
476  case Instruction::SIToFP:
477  case Instruction::FCmp:
478  return true;
479  }
480  }
481 
482  for (Value *Operand : J->operands())
483  if (memAddrUsesCTR(*TM, Operand))
484  return true;
485  }
486 
487  return false;
488 }
489 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
490  bool MadeChange = false;
491 
492  // Do not convert small short loops to CTR loop.
493  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
494  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
496  auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
497  *L->getHeader()->getParent());
498  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
500  for (BasicBlock *BB : L->blocks())
501  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
502  // 6 is an approximate latency for the mtctr instruction.
503  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
504  return false;
505  }
506 
507  // Process nested loops first.
508  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
509  MadeChange |= convertToCTRLoop(*I);
510  LLVM_DEBUG(dbgs() << "Nested loop converted\n");
511  }
512 
513  // If a nested loop has been converted, then we can't convert this loop.
514  if (MadeChange)
515  return MadeChange;
516 
517  // Bail out if the loop has irreducible control flow.
518  LoopBlocksRPO RPOT(L);
519  RPOT.perform(LI);
520  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
521  return false;
522 
523 #ifndef NDEBUG
524  // Stop trying after reaching the limit (if any).
525  int Limit = CTRLoopLimit;
526  if (Limit >= 0) {
527  if (Counter >= CTRLoopLimit)
528  return false;
529  Counter++;
530  }
531 #endif
532 
533  // We don't want to spill/restore the counter register, and so we don't
534  // want to use the counter register if the loop contains calls.
535  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
536  I != IE; ++I)
537  if (mightUseCTR(*I))
538  return MadeChange;
539 
540  SmallVector<BasicBlock*, 4> ExitingBlocks;
541  L->getExitingBlocks(ExitingBlocks);
542 
543  // If there is an exit edge known to be frequently taken,
544  // we should not transform this loop.
545  for (auto &BB : ExitingBlocks) {
546  Instruction *TI = BB->getTerminator();
547  if (!TI) continue;
548 
549  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
550  uint64_t TrueWeight = 0, FalseWeight = 0;
551  if (!BI->isConditional() ||
552  !BI->extractProfMetadata(TrueWeight, FalseWeight))
553  continue;
554 
555  // If the exit path is more frequent than the loop path,
556  // we return here without further analysis for this loop.
557  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
558  if (( TrueIsExit && FalseWeight < TrueWeight) ||
559  (!TrueIsExit && FalseWeight > TrueWeight))
560  return MadeChange;
561  }
562  }
563 
564  BasicBlock *CountedExitBlock = nullptr;
565  const SCEV *ExitCount = nullptr;
566  BranchInst *CountedExitBranch = nullptr;
567  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
568  IE = ExitingBlocks.end(); I != IE; ++I) {
569  const SCEV *EC = SE->getExitCount(L, *I);
570  LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
571  << (*I)->getName() << ": " << *EC << "\n");
572  if (isa<SCEVCouldNotCompute>(EC))
573  continue;
574  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
575  if (ConstEC->getValue()->isZero())
576  continue;
577  } else if (!SE->isLoopInvariant(EC, L))
578  continue;
579 
580  if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
581  continue;
582 
583  // If this exiting block is contained in a nested loop, it is not eligible
584  // for insertion of the branch-and-decrement since the inner loop would
585  // end up messing up the value in the CTR.
586  if (LI->getLoopFor(*I) != L)
587  continue;
588 
589  // We now have a loop-invariant count of loop iterations (which is not the
590  // constant zero) for which we know that this loop will not exit via this
591  // existing block.
592 
593  // We need to make sure that this block will run on every loop iteration.
594  // For this to be true, we must dominate all blocks with backedges. Such
595  // blocks are in-loop predecessors to the header block.
596  bool NotAlways = false;
597  for (pred_iterator PI = pred_begin(L->getHeader()),
598  PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
599  if (!L->contains(*PI))
600  continue;
601 
602  if (!DT->dominates(*I, *PI)) {
603  NotAlways = true;
604  break;
605  }
606  }
607 
608  if (NotAlways)
609  continue;
610 
611  // Make sure this blocks ends with a conditional branch.
612  Instruction *TI = (*I)->getTerminator();
613  if (!TI)
614  continue;
615 
616  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
617  if (!BI->isConditional())
618  continue;
619 
620  CountedExitBranch = BI;
621  } else
622  continue;
623 
624  // Note that this block may not be the loop latch block, even if the loop
625  // has a latch block.
626  CountedExitBlock = *I;
627  ExitCount = EC;
628  break;
629  }
630 
631  if (!CountedExitBlock)
632  return MadeChange;
633 
634  BasicBlock *Preheader = L->getLoopPreheader();
635 
636  // If we don't have a preheader, then insert one. If we already have a
637  // preheader, then we can use it (except if the preheader contains a use of
638  // the CTR register because some such uses might be reordered by the
639  // selection DAG after the mtctr instruction).
640  if (!Preheader || mightUseCTR(Preheader))
641  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
642  if (!Preheader)
643  return MadeChange;
644 
645  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
646  << "\n");
647 
648  // Insert the count into the preheader and replace the condition used by the
649  // selected branch.
650  MadeChange = true;
651 
652  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
653  LLVMContext &C = SE->getContext();
654  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
655  if (!ExitCount->getType()->isPointerTy() &&
656  ExitCount->getType() != CountType)
657  ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
658  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
659  Value *ECValue =
660  SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
661 
662  IRBuilder<> CountBuilder(Preheader->getTerminator());
663  Module *M = Preheader->getParent()->getParent();
664  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
665  CountType);
666  CountBuilder.CreateCall(MTCTRFunc, ECValue);
667 
668  IRBuilder<> CondBuilder(CountedExitBranch);
669  Value *DecFunc =
670  Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
671  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
672  Value *OldCond = CountedExitBranch->getCondition();
673  CountedExitBranch->setCondition(NewCond);
674 
675  // The false branch must exit the loop.
676  if (!L->contains(CountedExitBranch->getSuccessor(0)))
677  CountedExitBranch->swapSuccessors();
678 
679  // The old condition may be dead now, and may have even created a dead PHI
680  // (the original induction variable).
682  // Run through the basic blocks of the loop and see if any of them have dead
683  // PHIs that can be removed.
684  for (auto I : L->blocks())
685  DeleteDeadPHIs(I);
686 
687  ++NumCTRLoops;
688  return MadeChange;
689 }
690 
691 #ifndef NDEBUG
692 static bool clobbersCTR(const MachineInstr &MI) {
693  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
694  const MachineOperand &MO = MI.getOperand(i);
695  if (MO.isReg()) {
696  if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
697  return true;
698  } else if (MO.isRegMask()) {
699  if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
700  return true;
701  }
702  }
703 
704  return false;
705 }
706 
712  bool CheckPreds;
713 
714  if (I == MBB->begin()) {
715  Visited.insert(MBB);
716  goto queue_preds;
717  } else
718  --I;
719 
720 check_block:
721  Visited.insert(MBB);
722  if (I == MBB->end())
723  goto queue_preds;
724 
725  CheckPreds = true;
726  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
727  unsigned Opc = I->getOpcode();
728  if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
729  CheckPreds = false;
730  break;
731  }
732 
733  if (I != BI && clobbersCTR(*I)) {
734  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
735  << ") instruction " << *I
736  << " clobbers CTR, invalidating "
737  << printMBBReference(*BI->getParent()) << " ("
738  << BI->getParent()->getFullName() << ") instruction "
739  << *BI << "\n");
740  return false;
741  }
742 
743  if (I == IE)
744  break;
745  }
746 
747  if (!CheckPreds && Preds.empty())
748  return true;
749 
750  if (CheckPreds) {
751 queue_preds:
752  if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
753  LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
754  << printMBBReference(*BI->getParent()) << " ("
755  << BI->getParent()->getFullName() << ") instruction "
756  << *BI << "\n");
757  return false;
758  }
759 
761  PIE = MBB->pred_end(); PI != PIE; ++PI)
762  Preds.push_back(*PI);
763  }
764 
765  do {
766  MBB = Preds.pop_back_val();
767  if (!Visited.count(MBB)) {
768  I = MBB->getLastNonDebugInstr();
769  goto check_block;
770  }
771  } while (!Preds.empty());
772 
773  return true;
774 }
775 
776 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
777  MDT = &getAnalysis<MachineDominatorTree>();
778 
779  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
780  // any other instructions that might clobber the ctr register.
781  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
782  I != IE; ++I) {
783  MachineBasicBlock *MBB = &*I;
784  if (!MDT->isReachableFromEntry(MBB))
785  continue;
786 
788  MIIE = MBB->end(); MII != MIIE; ++MII) {
789  unsigned Opc = MII->getOpcode();
790  if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
791  Opc == PPC::BDZ8 || Opc == PPC::BDZ)
792  if (!verifyCTRBranch(MBB, MII))
793  llvm_unreachable("Invalid PPC CTR loop!");
794  }
795  }
796 
797  return false;
798 }
799 #endif // NDEBUG
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:563
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:850
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void swapSuccessors()
Swap the successors of this branch instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
void initializePPCCTRLoopsVerifyPass(PassRegistry &)
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:153
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
An immutable pass that tracks lazily created AssumptionCache objects.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * getCondition() const
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:159
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number...
Definition: InlineAsm.h:149
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
std::vector< Loop *>::const_iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
Definition: LoopInfo.h:656
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
FunctionPass * createPPCCTRLoops()
static cl::opt< int > CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1))
BlockT * getHeader() const
Definition: LoopInfo.h:100
std::vector< Loop *>::const_iterator iterator
Definition: LoopInfo.h:139
ppc ctr loops PowerPC CTR Loops Verify
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_END(PPCCTRLoops
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
FunctionPass * createPPCCTRLoopsVerify()
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:121
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1007
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
Wrapper pass for TargetTransformInfo.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:423
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
Common code between 32-bit and 64-bit PowerPC targets.
std::vector< MachineBasicBlock * >::iterator pred_iterator
static double log2(double V)
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
Extended Value Type.
Definition: ValueTypes.h:34
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:429
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
void initializePPCCTRLoopsPass(PassRegistry &)
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:116
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:266
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:852
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:180
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
static bool asmClobbersCTR(InlineAsm *IA)
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
StringRef getName() const
Definition: LoopInfo.h:583
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
Same for multiplication.
Definition: ISDOpcodes.h:257
ppc ctr loops
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:173
void setCondition(Value *V)
Multiway switch.
static bool clobbersCTR(const MachineInstr &MI)
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:181
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
block_iterator block_begin() const
Definition: LoopInfo.h:154
static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes...
Definition: InlineAsm.cpp:208
ppc ctr PowerPC CTR Loops
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1871
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65