LLVM  6.0.0svn
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the PPC branch instructions
11 // that decrement and test the count register (CTR) (bdnz and friends).
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 //
18 // Criteria for CTR loops:
19 // - Countable loops (w/ ind. var for a trip count)
20 // - Try inner-most loops first
21 // - No nested CTR loops.
22 // - No function calls in loops.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "PPC.h"
27 #include "PPCSubtarget.h"
28 #include "PPCTargetMachine.h"
29 #include "PPCTargetTransformInfo.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/Statistic.h"
34 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/IR/ValueHandle.h"
48 #include "llvm/PassSupport.h"
50 #include "llvm/Support/Debug.h"
52 #include "llvm/Transforms/Scalar.h"
56 
57 #ifndef NDEBUG
62 #endif
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "ctrloops"
67 
68 #ifndef NDEBUG
69 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
70 #endif
71 
72 // The latency of mtctr is only justified if there are more than 4
73 // comparisons that will be removed as a result.
74 static cl::opt<unsigned>
75 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
76  cl::desc("Loops with a constant trip count smaller than "
77  "this value will not use the count register."));
78 
79 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
80 
81 namespace llvm {
83 #ifndef NDEBUG
85 #endif
86 }
87 
88 namespace {
89  struct PPCCTRLoops : public FunctionPass {
90 
91 #ifndef NDEBUG
92  static int Counter;
93 #endif
94 
95  public:
96  static char ID;
97 
98  PPCCTRLoops() : FunctionPass(ID) {
100  }
101 
102  bool runOnFunction(Function &F) override;
103 
104  void getAnalysisUsage(AnalysisUsage &AU) const override {
112  }
113 
114  private:
115  bool mightUseCTR(BasicBlock *BB);
116  bool convertToCTRLoop(Loop *L);
117 
118  private:
119  const PPCTargetMachine *TM;
120  const PPCSubtarget *STI;
121  const PPCTargetLowering *TLI;
122  const DataLayout *DL;
123  const TargetLibraryInfo *LibInfo;
124  const TargetTransformInfo *TTI;
125  LoopInfo *LI;
126  ScalarEvolution *SE;
127  DominatorTree *DT;
128  bool PreserveLCSSA;
129  TargetSchedModel SchedModel;
130  };
131 
132  char PPCCTRLoops::ID = 0;
133 #ifndef NDEBUG
134  int PPCCTRLoops::Counter = 0;
135 #endif
136 
137 #ifndef NDEBUG
138  struct PPCCTRLoopsVerify : public MachineFunctionPass {
139  public:
140  static char ID;
141 
142  PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
144  }
145 
146  void getAnalysisUsage(AnalysisUsage &AU) const override {
149  }
150 
151  bool runOnMachineFunction(MachineFunction &MF) override;
152 
153  private:
155  };
156 
157  char PPCCTRLoopsVerify::ID = 0;
158 #endif // NDEBUG
159 } // end anonymous namespace
160 
161 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
162  false, false)
166 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
167  false, false)
168 
169 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
170 
171 #ifndef NDEBUG
172 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
173  "PowerPC CTR Loops Verify", false, false)
175 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
176  "PowerPC CTR Loops Verify", false, false)
177 
179  return new PPCCTRLoopsVerify();
180 }
181 #endif // NDEBUG
182 
184  if (skipFunction(F))
185  return false;
186 
187  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
188  if (!TPC)
189  return false;
190 
191  TM = &TPC->getTM<PPCTargetMachine>();
192  STI = TM->getSubtargetImpl(F);
193  TLI = STI->getTargetLowering();
194 
195  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
196  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
197  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
198  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
199  DL = &F.getParent()->getDataLayout();
200  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
201  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
202  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
203 
204  bool MadeChange = false;
205 
206  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
207  I != E; ++I) {
208  Loop *L = *I;
209  if (!L->getParentLoop())
210  MadeChange |= convertToCTRLoop(L);
211  }
212 
213  return MadeChange;
214 }
215 
216 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
217  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
218  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
219 
220  return false;
221 }
222 
223 // Determining the address of a TLS variable results in a function call in
224 // certain TLS models.
225 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
226  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
227  if (!GV) {
228  // Recurse to check for constants that refer to TLS global variables.
229  if (const auto *CV = dyn_cast<Constant>(MemAddr))
230  for (const auto &CO : CV->operands())
231  if (memAddrUsesCTR(TM, CO))
232  return true;
233 
234  return false;
235  }
236 
237  if (!GV->isThreadLocal())
238  return false;
240  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
241 }
242 
243 // Loop through the inline asm constraints and look for something that clobbers
244 // ctr.
245 static bool asmClobbersCTR(InlineAsm *IA) {
247  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
248  InlineAsm::ConstraintInfo &C = CIV[i];
249  if (C.Type != InlineAsm::isInput)
250  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
251  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
252  return true;
253  }
254  return false;
255 }
256 
257 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
258  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
259  J != JE; ++J) {
260  if (CallInst *CI = dyn_cast<CallInst>(J)) {
261  // Inline ASM is okay, unless it clobbers the ctr register.
262  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
263  if (asmClobbersCTR(IA))
264  return true;
265  continue;
266  }
267 
268  if (Function *F = CI->getCalledFunction()) {
269  // Most intrinsics don't become function calls, but some might.
270  // sin, cos, exp and log are always calls.
271  unsigned Opcode = 0;
273  switch (F->getIntrinsicID()) {
274  default: continue;
275  // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
276  // we're definitely using CTR.
277  case Intrinsic::ppc_is_decremented_ctr_nonzero:
278  case Intrinsic::ppc_mtctr:
279  return true;
280 
281 // VisualStudio defines setjmp as _setjmp
282 #if defined(_MSC_VER) && defined(setjmp) && \
283  !defined(setjmp_undefined_for_msvc)
284 # pragma push_macro("setjmp")
285 # undef setjmp
286 # define setjmp_undefined_for_msvc
287 #endif
288 
289  case Intrinsic::setjmp:
290 
291 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
292  // let's return it to _setjmp state
293 # pragma pop_macro("setjmp")
294 # undef setjmp_undefined_for_msvc
295 #endif
296 
297  case Intrinsic::longjmp:
298 
299  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
300  // because, although it does clobber the counter register, the
301  // control can't then return to inside the loop unless there is also
302  // an eh_sjlj_setjmp.
303  case Intrinsic::eh_sjlj_setjmp:
304 
305  case Intrinsic::memcpy:
306  case Intrinsic::memmove:
307  case Intrinsic::memset:
308  case Intrinsic::powi:
309  case Intrinsic::log:
310  case Intrinsic::log2:
311  case Intrinsic::log10:
312  case Intrinsic::exp:
313  case Intrinsic::exp2:
314  case Intrinsic::pow:
315  case Intrinsic::sin:
316  case Intrinsic::cos:
317  return true;
318  case Intrinsic::copysign:
319  if (CI->getArgOperand(0)->getType()->getScalarType()->
320  isPPC_FP128Ty())
321  return true;
322  else
323  continue; // ISD::FCOPYSIGN is never a library call.
324  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
325  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
326  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
327  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
328  case Intrinsic::rint: Opcode = ISD::FRINT; break;
329  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
330  case Intrinsic::round: Opcode = ISD::FROUND; break;
331  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
332  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
333  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
334  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
335  }
336  }
337 
338  // PowerPC does not use [US]DIVREM or other library calls for
339  // operations on regular types which are not otherwise library calls
340  // (i.e. soft float or atomics). If adapting for targets that do,
341  // additional care is required here.
342 
343  LibFunc Func;
344  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
345  LibInfo->getLibFunc(F->getName(), Func) &&
346  LibInfo->hasOptimizedCodeGen(Func)) {
347  // Non-read-only functions are never treated as intrinsics.
348  if (!CI->onlyReadsMemory())
349  return true;
350 
351  // Conversion happens only for FP calls.
352  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
353  return true;
354 
355  switch (Func) {
356  default: return true;
357  case LibFunc_copysign:
358  case LibFunc_copysignf:
359  continue; // ISD::FCOPYSIGN is never a library call.
360  case LibFunc_copysignl:
361  return true;
362  case LibFunc_fabs:
363  case LibFunc_fabsf:
364  case LibFunc_fabsl:
365  continue; // ISD::FABS is never a library call.
366  case LibFunc_sqrt:
367  case LibFunc_sqrtf:
368  case LibFunc_sqrtl:
369  Opcode = ISD::FSQRT; break;
370  case LibFunc_floor:
371  case LibFunc_floorf:
372  case LibFunc_floorl:
373  Opcode = ISD::FFLOOR; break;
374  case LibFunc_nearbyint:
375  case LibFunc_nearbyintf:
376  case LibFunc_nearbyintl:
377  Opcode = ISD::FNEARBYINT; break;
378  case LibFunc_ceil:
379  case LibFunc_ceilf:
380  case LibFunc_ceill:
381  Opcode = ISD::FCEIL; break;
382  case LibFunc_rint:
383  case LibFunc_rintf:
384  case LibFunc_rintl:
385  Opcode = ISD::FRINT; break;
386  case LibFunc_round:
387  case LibFunc_roundf:
388  case LibFunc_roundl:
389  Opcode = ISD::FROUND; break;
390  case LibFunc_trunc:
391  case LibFunc_truncf:
392  case LibFunc_truncl:
393  Opcode = ISD::FTRUNC; break;
394  case LibFunc_fmin:
395  case LibFunc_fminf:
396  case LibFunc_fminl:
397  Opcode = ISD::FMINNUM; break;
398  case LibFunc_fmax:
399  case LibFunc_fmaxf:
400  case LibFunc_fmaxl:
401  Opcode = ISD::FMAXNUM; break;
402  }
403  }
404 
405  if (Opcode) {
406  MVT VTy = TLI->getSimpleValueType(
407  *DL, CI->getArgOperand(0)->getType(), true);
408  if (VTy == MVT::Other)
409  return true;
410 
411  if (TLI->isOperationLegalOrCustom(Opcode, VTy))
412  continue;
413  else if (VTy.isVector() &&
414  TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
415  continue;
416 
417  return true;
418  }
419  }
420 
421  return true;
422  } else if (isa<BinaryOperator>(J) &&
423  J->getType()->getScalarType()->isPPC_FP128Ty()) {
424  // Most operations on ppc_f128 values become calls.
425  return true;
426  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
427  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
428  CastInst *CI = cast<CastInst>(J);
429  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
430  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
431  isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
432  isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
433  return true;
434  } else if (isLargeIntegerTy(!TM->isPPC64(),
435  J->getType()->getScalarType()) &&
436  (J->getOpcode() == Instruction::UDiv ||
437  J->getOpcode() == Instruction::SDiv ||
438  J->getOpcode() == Instruction::URem ||
439  J->getOpcode() == Instruction::SRem)) {
440  return true;
441  } else if (!TM->isPPC64() &&
442  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
443  (J->getOpcode() == Instruction::Shl ||
444  J->getOpcode() == Instruction::AShr ||
445  J->getOpcode() == Instruction::LShr)) {
446  // Only on PPC32, for 128-bit integers (specifically not 64-bit
447  // integers), these might be runtime calls.
448  return true;
449  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
450  // On PowerPC, indirect jumps use the counter register.
451  return true;
452  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
453  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
454  return true;
455  }
456 
457  if (STI->useSoftFloat()) {
458  switch(J->getOpcode()) {
459  case Instruction::FAdd:
460  case Instruction::FSub:
461  case Instruction::FMul:
462  case Instruction::FDiv:
463  case Instruction::FRem:
464  case Instruction::FPTrunc:
465  case Instruction::FPExt:
466  case Instruction::FPToUI:
467  case Instruction::FPToSI:
468  case Instruction::UIToFP:
469  case Instruction::SIToFP:
470  case Instruction::FCmp:
471  return true;
472  }
473  }
474 
475  for (Value *Operand : J->operands())
476  if (memAddrUsesCTR(*TM, Operand))
477  return true;
478  }
479 
480  return false;
481 }
482 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
483  bool MadeChange = false;
484 
485  // Do not convert small short loops to CTR loop.
486  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
487  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
489  auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
490  *L->getHeader()->getParent());
491  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
493  for (BasicBlock *BB : L->blocks())
494  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
495  // 6 is an approximate latency for the mtctr instruction.
496  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
497  return false;
498  }
499 
500  // Process nested loops first.
501  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
502  MadeChange |= convertToCTRLoop(*I);
503  DEBUG(dbgs() << "Nested loop converted\n");
504  }
505 
506  // If a nested loop has been converted, then we can't convert this loop.
507  if (MadeChange)
508  return MadeChange;
509 
510 #ifndef NDEBUG
511  // Stop trying after reaching the limit (if any).
512  int Limit = CTRLoopLimit;
513  if (Limit >= 0) {
514  if (Counter >= CTRLoopLimit)
515  return false;
516  Counter++;
517  }
518 #endif
519 
520  // We don't want to spill/restore the counter register, and so we don't
521  // want to use the counter register if the loop contains calls.
522  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
523  I != IE; ++I)
524  if (mightUseCTR(*I))
525  return MadeChange;
526 
527  SmallVector<BasicBlock*, 4> ExitingBlocks;
528  L->getExitingBlocks(ExitingBlocks);
529 
530  BasicBlock *CountedExitBlock = nullptr;
531  const SCEV *ExitCount = nullptr;
532  BranchInst *CountedExitBranch = nullptr;
533  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
534  IE = ExitingBlocks.end(); I != IE; ++I) {
535  const SCEV *EC = SE->getExitCount(L, *I);
536  DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
537  (*I)->getName() << ": " << *EC << "\n");
538  if (isa<SCEVCouldNotCompute>(EC))
539  continue;
540  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
541  if (ConstEC->getValue()->isZero())
542  continue;
543  } else if (!SE->isLoopInvariant(EC, L))
544  continue;
545 
546  if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
547  continue;
548 
549  // We now have a loop-invariant count of loop iterations (which is not the
550  // constant zero) for which we know that this loop will not exit via this
551  // exisiting block.
552 
553  // We need to make sure that this block will run on every loop iteration.
554  // For this to be true, we must dominate all blocks with backedges. Such
555  // blocks are in-loop predecessors to the header block.
556  bool NotAlways = false;
557  for (pred_iterator PI = pred_begin(L->getHeader()),
558  PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
559  if (!L->contains(*PI))
560  continue;
561 
562  if (!DT->dominates(*I, *PI)) {
563  NotAlways = true;
564  break;
565  }
566  }
567 
568  if (NotAlways)
569  continue;
570 
571  // Make sure this blocks ends with a conditional branch.
572  Instruction *TI = (*I)->getTerminator();
573  if (!TI)
574  continue;
575 
576  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
577  if (!BI->isConditional())
578  continue;
579 
580  CountedExitBranch = BI;
581  } else
582  continue;
583 
584  // Note that this block may not be the loop latch block, even if the loop
585  // has a latch block.
586  CountedExitBlock = *I;
587  ExitCount = EC;
588  break;
589  }
590 
591  if (!CountedExitBlock)
592  return MadeChange;
593 
594  BasicBlock *Preheader = L->getLoopPreheader();
595 
596  // If we don't have a preheader, then insert one. If we already have a
597  // preheader, then we can use it (except if the preheader contains a use of
598  // the CTR register because some such uses might be reordered by the
599  // selection DAG after the mtctr instruction).
600  if (!Preheader || mightUseCTR(Preheader))
601  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
602  if (!Preheader)
603  return MadeChange;
604 
605  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
606 
607  // Insert the count into the preheader and replace the condition used by the
608  // selected branch.
609  MadeChange = true;
610 
611  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
612  LLVMContext &C = SE->getContext();
613  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
614  if (!ExitCount->getType()->isPointerTy() &&
615  ExitCount->getType() != CountType)
616  ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
617  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
618  Value *ECValue =
619  SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
620 
621  IRBuilder<> CountBuilder(Preheader->getTerminator());
622  Module *M = Preheader->getParent()->getParent();
623  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
624  CountType);
625  CountBuilder.CreateCall(MTCTRFunc, ECValue);
626 
627  IRBuilder<> CondBuilder(CountedExitBranch);
628  Value *DecFunc =
629  Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
630  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
631  Value *OldCond = CountedExitBranch->getCondition();
632  CountedExitBranch->setCondition(NewCond);
633 
634  // The false branch must exit the loop.
635  if (!L->contains(CountedExitBranch->getSuccessor(0)))
636  CountedExitBranch->swapSuccessors();
637 
638  // The old condition may be dead now, and may have even created a dead PHI
639  // (the original induction variable).
641  // Run through the basic blocks of the loop and see if any of them have dead
642  // PHIs that can be removed.
643  for (auto I : L->blocks())
644  DeleteDeadPHIs(I);
645 
646  ++NumCTRLoops;
647  return MadeChange;
648 }
649 
650 #ifndef NDEBUG
651 static bool clobbersCTR(const MachineInstr &MI) {
652  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
653  const MachineOperand &MO = MI.getOperand(i);
654  if (MO.isReg()) {
655  if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
656  return true;
657  } else if (MO.isRegMask()) {
658  if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
659  return true;
660  }
661  }
662 
663  return false;
664 }
665 
671  bool CheckPreds;
672 
673  if (I == MBB->begin()) {
674  Visited.insert(MBB);
675  goto queue_preds;
676  } else
677  --I;
678 
679 check_block:
680  Visited.insert(MBB);
681  if (I == MBB->end())
682  goto queue_preds;
683 
684  CheckPreds = true;
685  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
686  unsigned Opc = I->getOpcode();
687  if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
688  CheckPreds = false;
689  break;
690  }
691 
692  if (I != BI && clobbersCTR(*I)) {
693  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
694  MBB->getFullName() << ") instruction " << *I <<
695  " clobbers CTR, invalidating " << "BB#" <<
696  BI->getParent()->getNumber() << " (" <<
697  BI->getParent()->getFullName() << ") instruction " <<
698  *BI << "\n");
699  return false;
700  }
701 
702  if (I == IE)
703  break;
704  }
705 
706  if (!CheckPreds && Preds.empty())
707  return true;
708 
709  if (CheckPreds) {
710 queue_preds:
711  if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
712  DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
713  BI->getParent()->getNumber() << " (" <<
714  BI->getParent()->getFullName() << ") instruction " <<
715  *BI << "\n");
716  return false;
717  }
718 
720  PIE = MBB->pred_end(); PI != PIE; ++PI)
721  Preds.push_back(*PI);
722  }
723 
724  do {
725  MBB = Preds.pop_back_val();
726  if (!Visited.count(MBB)) {
727  I = MBB->getLastNonDebugInstr();
728  goto check_block;
729  }
730  } while (!Preds.empty());
731 
732  return true;
733 }
734 
735 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
736  MDT = &getAnalysis<MachineDominatorTree>();
737 
738  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
739  // any other instructions that might clobber the ctr register.
740  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
741  I != IE; ++I) {
742  MachineBasicBlock *MBB = &*I;
743  if (!MDT->isReachableFromEntry(MBB))
744  continue;
745 
747  MIIE = MBB->end(); MII != MIIE; ++MII) {
748  unsigned Opc = MII->getOpcode();
749  if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
750  Opc == PPC::BDZ8 || Opc == PPC::BDZ)
751  if (!verifyCTRBranch(MBB, MII))
752  llvm_unreachable("Invalid PPC CTR loop!");
753  }
754  }
755 
756  return false;
757 }
758 #endif // NDEBUG
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:569
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:818
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool hasLocalLinkage() const
Definition: GlobalValue.h:427
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void swapSuccessors()
Swap the successors of this branch instruction.
void initializePPCCTRLoopsVerifyPass(PassRegistry &)
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:153
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr)
bool isVector() const
Return true if this is a vector value type.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * getCondition() const
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:159
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number...
Definition: InlineAsm.h:149
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
std::vector< Loop *>::const_iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
Definition: LoopInfo.h:650
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
FunctionPass * createPPCCTRLoops()
static cl::opt< int > CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1))
BlockT * getHeader() const
Definition: LoopInfo.h:100
std::vector< Loop *>::const_iterator iterator
Definition: LoopInfo.h:139
ppc ctr loops PowerPC CTR Loops Verify
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_END(PPCCTRLoops
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
FunctionPass * createPPCCTRLoopsVerify()
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:121
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:980
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
Wrapper pass for TargetTransformInfo.
Machine Value Type.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:412
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
Common code between 32-bit and 64-bit PowerPC targets.
std::vector< MachineBasicBlock * >::iterator pred_iterator
static double log2(double V)
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:401
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
void initializePPCCTRLoopsPass(PassRegistry &)
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:116
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:820
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
static bool asmClobbersCTR(InlineAsm *IA)
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:577
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
Same for multiplication.
Definition: ISDOpcodes.h:256
ppc ctr loops
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setCondition(Value *V)
Multiway switch.
static bool clobbersCTR(const MachineInstr &MI)
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
block_iterator block_begin() const
Definition: LoopInfo.h:154
static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes...
Definition: InlineAsm.cpp:208
ppc ctr PowerPC CTR Loops
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65