LLVM  7.0.0svn
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the PPC branch instructions
11 // that decrement and test the count register (CTR) (bdnz and friends).
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 //
18 // Criteria for CTR loops:
19 // - Countable loops (w/ ind. var for a trip count)
20 // - Try inner-most loops first
21 // - No nested CTR loops.
22 // - No function calls in loops.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "PPC.h"
27 #include "PPCSubtarget.h"
28 #include "PPCTargetMachine.h"
29 #include "PPCTargetTransformInfo.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/Statistic.h"
34 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/IR/ValueHandle.h"
48 #include "llvm/PassSupport.h"
50 #include "llvm/Support/Debug.h"
52 #include "llvm/Transforms/Scalar.h"
56 
57 #ifndef NDEBUG
62 #endif
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "ctrloops"
67 
68 #ifndef NDEBUG
69 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
70 #endif
71 
72 // The latency of mtctr is only justified if there are more than 4
73 // comparisons that will be removed as a result.
74 static cl::opt<unsigned>
75 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
76  cl::desc("Loops with a constant trip count smaller than "
77  "this value will not use the count register."));
78 
79 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
80 
81 namespace llvm {
83 #ifndef NDEBUG
85 #endif
86 }
87 
88 namespace {
89  struct PPCCTRLoops : public FunctionPass {
90 
91 #ifndef NDEBUG
92  static int Counter;
93 #endif
94 
95  public:
96  static char ID;
97 
98  PPCCTRLoops() : FunctionPass(ID) {
100  }
101 
102  bool runOnFunction(Function &F) override;
103 
104  void getAnalysisUsage(AnalysisUsage &AU) const override {
112  }
113 
114  private:
115  bool mightUseCTR(BasicBlock *BB);
116  bool convertToCTRLoop(Loop *L);
117 
118  private:
119  const PPCTargetMachine *TM;
120  const PPCSubtarget *STI;
121  const PPCTargetLowering *TLI;
122  const DataLayout *DL;
123  const TargetLibraryInfo *LibInfo;
124  const TargetTransformInfo *TTI;
125  LoopInfo *LI;
126  ScalarEvolution *SE;
127  DominatorTree *DT;
128  bool PreserveLCSSA;
129  TargetSchedModel SchedModel;
130  };
131 
132  char PPCCTRLoops::ID = 0;
133 #ifndef NDEBUG
134  int PPCCTRLoops::Counter = 0;
135 #endif
136 
137 #ifndef NDEBUG
138  struct PPCCTRLoopsVerify : public MachineFunctionPass {
139  public:
140  static char ID;
141 
142  PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
144  }
145 
146  void getAnalysisUsage(AnalysisUsage &AU) const override {
149  }
150 
151  bool runOnMachineFunction(MachineFunction &MF) override;
152 
153  private:
155  };
156 
157  char PPCCTRLoopsVerify::ID = 0;
158 #endif // NDEBUG
159 } // end anonymous namespace
160 
161 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
162  false, false)
166 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
167  false, false)
168 
169 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
170 
171 #ifndef NDEBUG
172 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
173  "PowerPC CTR Loops Verify", false, false)
175 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
176  "PowerPC CTR Loops Verify", false, false)
177 
179  return new PPCCTRLoopsVerify();
180 }
181 #endif // NDEBUG
182 
184  if (skipFunction(F))
185  return false;
186 
187  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
188  if (!TPC)
189  return false;
190 
191  TM = &TPC->getTM<PPCTargetMachine>();
192  STI = TM->getSubtargetImpl(F);
193  TLI = STI->getTargetLowering();
194 
195  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
196  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
197  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
198  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
199  DL = &F.getParent()->getDataLayout();
200  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
201  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
202  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
203 
204  bool MadeChange = false;
205 
206  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
207  I != E; ++I) {
208  Loop *L = *I;
209  if (!L->getParentLoop())
210  MadeChange |= convertToCTRLoop(L);
211  }
212 
213  return MadeChange;
214 }
215 
216 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
217  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
218  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
219 
220  return false;
221 }
222 
223 // Determining the address of a TLS variable results in a function call in
224 // certain TLS models.
225 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
226  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
227  if (!GV) {
228  // Recurse to check for constants that refer to TLS global variables.
229  if (const auto *CV = dyn_cast<Constant>(MemAddr))
230  for (const auto &CO : CV->operands())
231  if (memAddrUsesCTR(TM, CO))
232  return true;
233 
234  return false;
235  }
236 
237  if (!GV->isThreadLocal())
238  return false;
240  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
241 }
242 
243 // Loop through the inline asm constraints and look for something that clobbers
244 // ctr.
245 static bool asmClobbersCTR(InlineAsm *IA) {
247  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
248  InlineAsm::ConstraintInfo &C = CIV[i];
249  if (C.Type != InlineAsm::isInput)
250  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
251  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
252  return true;
253  }
254  return false;
255 }
256 
257 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
258  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
259  J != JE; ++J) {
260  if (CallInst *CI = dyn_cast<CallInst>(J)) {
261  // Inline ASM is okay, unless it clobbers the ctr register.
262  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
263  if (asmClobbersCTR(IA))
264  return true;
265  continue;
266  }
267 
268  if (Function *F = CI->getCalledFunction()) {
269  // Most intrinsics don't become function calls, but some might.
270  // sin, cos, exp and log are always calls.
271  unsigned Opcode = 0;
273  switch (F->getIntrinsicID()) {
274  default: continue;
275  // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
276  // we're definitely using CTR.
277  case Intrinsic::ppc_is_decremented_ctr_nonzero:
278  case Intrinsic::ppc_mtctr:
279  return true;
280 
281 // VisualStudio defines setjmp as _setjmp
282 #if defined(_MSC_VER) && defined(setjmp) && \
283  !defined(setjmp_undefined_for_msvc)
284 # pragma push_macro("setjmp")
285 # undef setjmp
286 # define setjmp_undefined_for_msvc
287 #endif
288 
289  case Intrinsic::setjmp:
290 
291 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
292  // let's return it to _setjmp state
293 # pragma pop_macro("setjmp")
294 # undef setjmp_undefined_for_msvc
295 #endif
296 
297  case Intrinsic::longjmp:
298 
299  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
300  // because, although it does clobber the counter register, the
301  // control can't then return to inside the loop unless there is also
302  // an eh_sjlj_setjmp.
303  case Intrinsic::eh_sjlj_setjmp:
304 
305  case Intrinsic::memcpy:
306  case Intrinsic::memmove:
307  case Intrinsic::memset:
308  case Intrinsic::powi:
309  case Intrinsic::log:
310  case Intrinsic::log2:
311  case Intrinsic::log10:
312  case Intrinsic::exp:
313  case Intrinsic::exp2:
314  case Intrinsic::pow:
315  case Intrinsic::sin:
316  case Intrinsic::cos:
317  return true;
318  case Intrinsic::copysign:
319  if (CI->getArgOperand(0)->getType()->getScalarType()->
320  isPPC_FP128Ty())
321  return true;
322  else
323  continue; // ISD::FCOPYSIGN is never a library call.
324  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
325  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
326  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
327  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
328  case Intrinsic::rint: Opcode = ISD::FRINT; break;
329  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
330  case Intrinsic::round: Opcode = ISD::FROUND; break;
331  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
332  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
333  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
334  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
335  }
336  }
337 
338  // PowerPC does not use [US]DIVREM or other library calls for
339  // operations on regular types which are not otherwise library calls
340  // (i.e. soft float or atomics). If adapting for targets that do,
341  // additional care is required here.
342 
343  LibFunc Func;
344  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
345  LibInfo->getLibFunc(F->getName(), Func) &&
346  LibInfo->hasOptimizedCodeGen(Func)) {
347  // Non-read-only functions are never treated as intrinsics.
348  if (!CI->onlyReadsMemory())
349  return true;
350 
351  // Conversion happens only for FP calls.
352  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
353  return true;
354 
355  switch (Func) {
356  default: return true;
357  case LibFunc_copysign:
358  case LibFunc_copysignf:
359  continue; // ISD::FCOPYSIGN is never a library call.
360  case LibFunc_copysignl:
361  return true;
362  case LibFunc_fabs:
363  case LibFunc_fabsf:
364  case LibFunc_fabsl:
365  continue; // ISD::FABS is never a library call.
366  case LibFunc_sqrt:
367  case LibFunc_sqrtf:
368  case LibFunc_sqrtl:
369  Opcode = ISD::FSQRT; break;
370  case LibFunc_floor:
371  case LibFunc_floorf:
372  case LibFunc_floorl:
373  Opcode = ISD::FFLOOR; break;
374  case LibFunc_nearbyint:
375  case LibFunc_nearbyintf:
376  case LibFunc_nearbyintl:
377  Opcode = ISD::FNEARBYINT; break;
378  case LibFunc_ceil:
379  case LibFunc_ceilf:
380  case LibFunc_ceill:
381  Opcode = ISD::FCEIL; break;
382  case LibFunc_rint:
383  case LibFunc_rintf:
384  case LibFunc_rintl:
385  Opcode = ISD::FRINT; break;
386  case LibFunc_round:
387  case LibFunc_roundf:
388  case LibFunc_roundl:
389  Opcode = ISD::FROUND; break;
390  case LibFunc_trunc:
391  case LibFunc_truncf:
392  case LibFunc_truncl:
393  Opcode = ISD::FTRUNC; break;
394  case LibFunc_fmin:
395  case LibFunc_fminf:
396  case LibFunc_fminl:
397  Opcode = ISD::FMINNUM; break;
398  case LibFunc_fmax:
399  case LibFunc_fmaxf:
400  case LibFunc_fmaxl:
401  Opcode = ISD::FMAXNUM; break;
402  }
403  }
404 
405  if (Opcode) {
406  EVT EVTy =
407  TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
408 
409  if (EVTy == MVT::Other)
410  return true;
411 
412  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
413  continue;
414  else if (EVTy.isVector() &&
415  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
416  continue;
417 
418  return true;
419  }
420  }
421 
422  return true;
423  } else if (isa<BinaryOperator>(J) &&
424  J->getType()->getScalarType()->isPPC_FP128Ty()) {
425  // Most operations on ppc_f128 values become calls.
426  return true;
427  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
428  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
429  CastInst *CI = cast<CastInst>(J);
430  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
431  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
432  isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
433  isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
434  return true;
435  } else if (isLargeIntegerTy(!TM->isPPC64(),
436  J->getType()->getScalarType()) &&
437  (J->getOpcode() == Instruction::UDiv ||
438  J->getOpcode() == Instruction::SDiv ||
439  J->getOpcode() == Instruction::URem ||
440  J->getOpcode() == Instruction::SRem)) {
441  return true;
442  } else if (!TM->isPPC64() &&
443  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
444  (J->getOpcode() == Instruction::Shl ||
445  J->getOpcode() == Instruction::AShr ||
446  J->getOpcode() == Instruction::LShr)) {
447  // Only on PPC32, for 128-bit integers (specifically not 64-bit
448  // integers), these might be runtime calls.
449  return true;
450  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
451  // On PowerPC, indirect jumps use the counter register.
452  return true;
453  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
454  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
455  return true;
456  }
457 
458  if (STI->useSoftFloat()) {
459  switch(J->getOpcode()) {
460  case Instruction::FAdd:
461  case Instruction::FSub:
462  case Instruction::FMul:
463  case Instruction::FDiv:
464  case Instruction::FRem:
465  case Instruction::FPTrunc:
466  case Instruction::FPExt:
467  case Instruction::FPToUI:
468  case Instruction::FPToSI:
469  case Instruction::UIToFP:
470  case Instruction::SIToFP:
471  case Instruction::FCmp:
472  return true;
473  }
474  }
475 
476  for (Value *Operand : J->operands())
477  if (memAddrUsesCTR(*TM, Operand))
478  return true;
479  }
480 
481  return false;
482 }
483 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
484  bool MadeChange = false;
485 
486  // Do not convert small short loops to CTR loop.
487  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
488  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
490  auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
491  *L->getHeader()->getParent());
492  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
494  for (BasicBlock *BB : L->blocks())
495  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
496  // 6 is an approximate latency for the mtctr instruction.
497  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
498  return false;
499  }
500 
501  // Process nested loops first.
502  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
503  MadeChange |= convertToCTRLoop(*I);
504  DEBUG(dbgs() << "Nested loop converted\n");
505  }
506 
507  // If a nested loop has been converted, then we can't convert this loop.
508  if (MadeChange)
509  return MadeChange;
510 
511 #ifndef NDEBUG
512  // Stop trying after reaching the limit (if any).
513  int Limit = CTRLoopLimit;
514  if (Limit >= 0) {
515  if (Counter >= CTRLoopLimit)
516  return false;
517  Counter++;
518  }
519 #endif
520 
521  // We don't want to spill/restore the counter register, and so we don't
522  // want to use the counter register if the loop contains calls.
523  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
524  I != IE; ++I)
525  if (mightUseCTR(*I))
526  return MadeChange;
527 
528  SmallVector<BasicBlock*, 4> ExitingBlocks;
529  L->getExitingBlocks(ExitingBlocks);
530 
531  BasicBlock *CountedExitBlock = nullptr;
532  const SCEV *ExitCount = nullptr;
533  BranchInst *CountedExitBranch = nullptr;
534  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
535  IE = ExitingBlocks.end(); I != IE; ++I) {
536  const SCEV *EC = SE->getExitCount(L, *I);
537  DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
538  (*I)->getName() << ": " << *EC << "\n");
539  if (isa<SCEVCouldNotCompute>(EC))
540  continue;
541  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
542  if (ConstEC->getValue()->isZero())
543  continue;
544  } else if (!SE->isLoopInvariant(EC, L))
545  continue;
546 
547  if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
548  continue;
549 
550  // We now have a loop-invariant count of loop iterations (which is not the
551  // constant zero) for which we know that this loop will not exit via this
552  // exisiting block.
553 
554  // We need to make sure that this block will run on every loop iteration.
555  // For this to be true, we must dominate all blocks with backedges. Such
556  // blocks are in-loop predecessors to the header block.
557  bool NotAlways = false;
558  for (pred_iterator PI = pred_begin(L->getHeader()),
559  PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
560  if (!L->contains(*PI))
561  continue;
562 
563  if (!DT->dominates(*I, *PI)) {
564  NotAlways = true;
565  break;
566  }
567  }
568 
569  if (NotAlways)
570  continue;
571 
572  // Make sure this blocks ends with a conditional branch.
573  Instruction *TI = (*I)->getTerminator();
574  if (!TI)
575  continue;
576 
577  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
578  if (!BI->isConditional())
579  continue;
580 
581  CountedExitBranch = BI;
582  } else
583  continue;
584 
585  // Note that this block may not be the loop latch block, even if the loop
586  // has a latch block.
587  CountedExitBlock = *I;
588  ExitCount = EC;
589  break;
590  }
591 
592  if (!CountedExitBlock)
593  return MadeChange;
594 
595  BasicBlock *Preheader = L->getLoopPreheader();
596 
597  // If we don't have a preheader, then insert one. If we already have a
598  // preheader, then we can use it (except if the preheader contains a use of
599  // the CTR register because some such uses might be reordered by the
600  // selection DAG after the mtctr instruction).
601  if (!Preheader || mightUseCTR(Preheader))
602  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
603  if (!Preheader)
604  return MadeChange;
605 
606  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
607 
608  // Insert the count into the preheader and replace the condition used by the
609  // selected branch.
610  MadeChange = true;
611 
612  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
613  LLVMContext &C = SE->getContext();
614  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
615  if (!ExitCount->getType()->isPointerTy() &&
616  ExitCount->getType() != CountType)
617  ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
618  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
619  Value *ECValue =
620  SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
621 
622  IRBuilder<> CountBuilder(Preheader->getTerminator());
623  Module *M = Preheader->getParent()->getParent();
624  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
625  CountType);
626  CountBuilder.CreateCall(MTCTRFunc, ECValue);
627 
628  IRBuilder<> CondBuilder(CountedExitBranch);
629  Value *DecFunc =
630  Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
631  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
632  Value *OldCond = CountedExitBranch->getCondition();
633  CountedExitBranch->setCondition(NewCond);
634 
635  // The false branch must exit the loop.
636  if (!L->contains(CountedExitBranch->getSuccessor(0)))
637  CountedExitBranch->swapSuccessors();
638 
639  // The old condition may be dead now, and may have even created a dead PHI
640  // (the original induction variable).
642  // Run through the basic blocks of the loop and see if any of them have dead
643  // PHIs that can be removed.
644  for (auto I : L->blocks())
645  DeleteDeadPHIs(I);
646 
647  ++NumCTRLoops;
648  return MadeChange;
649 }
650 
651 #ifndef NDEBUG
652 static bool clobbersCTR(const MachineInstr &MI) {
653  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
654  const MachineOperand &MO = MI.getOperand(i);
655  if (MO.isReg()) {
656  if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
657  return true;
658  } else if (MO.isRegMask()) {
659  if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
660  return true;
661  }
662  }
663 
664  return false;
665 }
666 
672  bool CheckPreds;
673 
674  if (I == MBB->begin()) {
675  Visited.insert(MBB);
676  goto queue_preds;
677  } else
678  --I;
679 
680 check_block:
681  Visited.insert(MBB);
682  if (I == MBB->end())
683  goto queue_preds;
684 
685  CheckPreds = true;
686  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
687  unsigned Opc = I->getOpcode();
688  if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
689  CheckPreds = false;
690  break;
691  }
692 
693  if (I != BI && clobbersCTR(*I)) {
694  DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
695  << ") instruction " << *I << " clobbers CTR, invalidating "
696  << printMBBReference(*BI->getParent()) << " ("
697  << BI->getParent()->getFullName() << ") instruction " << *BI
698  << "\n");
699  return false;
700  }
701 
702  if (I == IE)
703  break;
704  }
705 
706  if (!CheckPreds && Preds.empty())
707  return true;
708 
709  if (CheckPreds) {
710 queue_preds:
711  if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
712  DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
713  << printMBBReference(*BI->getParent()) << " ("
714  << BI->getParent()->getFullName() << ") instruction " << *BI
715  << "\n");
716  return false;
717  }
718 
720  PIE = MBB->pred_end(); PI != PIE; ++PI)
721  Preds.push_back(*PI);
722  }
723 
724  do {
725  MBB = Preds.pop_back_val();
726  if (!Visited.count(MBB)) {
727  I = MBB->getLastNonDebugInstr();
728  goto check_block;
729  }
730  } while (!Preds.empty());
731 
732  return true;
733 }
734 
735 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
736  MDT = &getAnalysis<MachineDominatorTree>();
737 
738  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
739  // any other instructions that might clobber the ctr register.
740  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
741  I != IE; ++I) {
742  MachineBasicBlock *MBB = &*I;
743  if (!MDT->isReachableFromEntry(MBB))
744  continue;
745 
747  MIIE = MBB->end(); MII != MIIE; ++MII) {
748  unsigned Opc = MII->getOpcode();
749  if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
750  Opc == PPC::BDZ8 || Opc == PPC::BDZ)
751  if (!verifyCTRBranch(MBB, MII))
752  llvm_unreachable("Invalid PPC CTR loop!");
753  }
754  }
755 
756  return false;
757 }
758 #endif // NDEBUG
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:570
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:818
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool hasLocalLinkage() const
Definition: GlobalValue.h:428
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void swapSuccessors()
Swap the successors of this branch instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
void initializePPCCTRLoopsVerifyPass(PassRegistry &)
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:153
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * getCondition() const
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:159
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number...
Definition: InlineAsm.h:149
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
std::vector< Loop *>::const_iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
Definition: LoopInfo.h:650
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
FunctionPass * createPPCCTRLoops()
static cl::opt< int > CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1))
BlockT * getHeader() const
Definition: LoopInfo.h:100
std::vector< Loop *>::const_iterator iterator
Definition: LoopInfo.h:139
ppc ctr loops PowerPC CTR Loops Verify
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_END(PPCCTRLoops
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
FunctionPass * createPPCCTRLoopsVerify()
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:121
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:979
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
Wrapper pass for TargetTransformInfo.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:413
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
Common code between 32-bit and 64-bit PowerPC targets.
std::vector< MachineBasicBlock * >::iterator pred_iterator
static double log2(double V)
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
Extended Value Type.
Definition: ValueTypes.h:34
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:422
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
void initializePPCCTRLoopsPass(PassRegistry &)
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:116
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:820
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
static bool asmClobbersCTR(InlineAsm *IA)
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:577
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
Same for multiplication.
Definition: ISDOpcodes.h:257
ppc ctr loops
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setCondition(Value *V)
Multiway switch.
static bool clobbersCTR(const MachineInstr &MI)
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:559
LLVM Value Representation.
Definition: Value.h:73
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
block_iterator block_begin() const
Definition: LoopInfo.h:154
static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes...
Definition: InlineAsm.cpp:208
ppc ctr PowerPC CTR Loops
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65