LLVM  9.0.0svn
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the PPC branch instructions
10 // that decrement and test the count register (CTR) (bdnz and friends).
11 //
12 // The pattern that defines the induction variable can changed depending on
13 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
14 // normalizes induction variables, and the Loop Strength Reduction pass
15 // run by 'llc' may also make changes to the induction variable.
16 //
17 // Criteria for CTR loops:
18 // - Countable loops (w/ ind. var for a trip count)
19 // - Try inner-most loops first
20 // - No nested CTR loops.
21 // - No function calls in loops.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "PPC.h"
26 #include "PPCSubtarget.h"
27 #include "PPCTargetMachine.h"
28 #include "PPCTargetTransformInfo.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/CFG.h"
34 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/ValueHandle.h"
50 #include "llvm/PassSupport.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
58 
59 #ifndef NDEBUG
64 #endif
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "ctrloops"
69 
70 #ifndef NDEBUG
71 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
72 #endif
73 
74 // The latency of mtctr is only justified if there are more than 4
75 // comparisons that will be removed as a result.
76 static cl::opt<unsigned>
77 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
78  cl::desc("Loops with a constant trip count smaller than "
79  "this value will not use the count register."));
80 
81 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
82 
83 namespace llvm {
85 #ifndef NDEBUG
87 #endif
88 }
89 
90 namespace {
91  struct PPCCTRLoops : public FunctionPass {
92 
93 #ifndef NDEBUG
94  static int Counter;
95 #endif
96 
97  public:
98  static char ID;
99 
100  PPCCTRLoops() : FunctionPass(ID) {
102  }
103 
104  bool runOnFunction(Function &F) override;
105 
106  void getAnalysisUsage(AnalysisUsage &AU) const override {
114  }
115 
116  private:
117  bool mightUseCTR(BasicBlock *BB);
118  bool convertToCTRLoop(Loop *L);
119 
120  private:
121  const PPCTargetMachine *TM;
122  const PPCSubtarget *STI;
123  const PPCTargetLowering *TLI;
124  const DataLayout *DL;
125  const TargetLibraryInfo *LibInfo;
126  const TargetTransformInfo *TTI;
127  LoopInfo *LI;
128  ScalarEvolution *SE;
129  DominatorTree *DT;
130  bool PreserveLCSSA;
131  TargetSchedModel SchedModel;
132  };
133 
134  char PPCCTRLoops::ID = 0;
135 #ifndef NDEBUG
136  int PPCCTRLoops::Counter = 0;
137 #endif
138 
139 #ifndef NDEBUG
140  struct PPCCTRLoopsVerify : public MachineFunctionPass {
141  public:
142  static char ID;
143 
144  PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
146  }
147 
148  void getAnalysisUsage(AnalysisUsage &AU) const override {
151  }
152 
153  bool runOnMachineFunction(MachineFunction &MF) override;
154 
155  private:
157  };
158 
159  char PPCCTRLoopsVerify::ID = 0;
160 #endif // NDEBUG
161 } // end anonymous namespace
162 
163 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
164  false, false)
168 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
169  false, false)
170 
171 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
172 
173 #ifndef NDEBUG
174 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
175  "PowerPC CTR Loops Verify", false, false)
177 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
178  "PowerPC CTR Loops Verify", false, false)
179 
181  return new PPCCTRLoopsVerify();
182 }
183 #endif // NDEBUG
184 
186  if (skipFunction(F))
187  return false;
188 
189  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
190  if (!TPC)
191  return false;
192 
193  TM = &TPC->getTM<PPCTargetMachine>();
194  STI = TM->getSubtargetImpl(F);
195  TLI = STI->getTargetLowering();
196 
197  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
198  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
199  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
201  DL = &F.getParent()->getDataLayout();
202  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
203  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
204  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
205 
206  bool MadeChange = false;
207 
208  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
209  I != E; ++I) {
210  Loop *L = *I;
211  if (!L->getParentLoop())
212  MadeChange |= convertToCTRLoop(L);
213  }
214 
215  return MadeChange;
216 }
217 
218 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
219  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
220  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
221 
222  return false;
223 }
224 
225 // Determining the address of a TLS variable results in a function call in
226 // certain TLS models.
227 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
228  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
229  if (!GV) {
230  // Recurse to check for constants that refer to TLS global variables.
231  if (const auto *CV = dyn_cast<Constant>(MemAddr))
232  for (const auto &CO : CV->operands())
233  if (memAddrUsesCTR(TM, CO))
234  return true;
235 
236  return false;
237  }
238 
239  if (!GV->isThreadLocal())
240  return false;
242  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
243 }
244 
245 // Loop through the inline asm constraints and look for something that clobbers
246 // ctr.
247 static bool asmClobbersCTR(InlineAsm *IA) {
249  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
250  InlineAsm::ConstraintInfo &C = CIV[i];
251  if (C.Type != InlineAsm::isInput)
252  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
253  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
254  return true;
255  }
256  return false;
257 }
258 
259 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
260  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
261  J != JE; ++J) {
262  if (CallInst *CI = dyn_cast<CallInst>(J)) {
263  // Inline ASM is okay, unless it clobbers the ctr register.
264  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
265  if (asmClobbersCTR(IA))
266  return true;
267  continue;
268  }
269 
270  if (Function *F = CI->getCalledFunction()) {
271  // Most intrinsics don't become function calls, but some might.
272  // sin, cos, exp and log are always calls.
273  unsigned Opcode = 0;
275  switch (F->getIntrinsicID()) {
276  default: continue;
277  // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
278  // we're definitely using CTR.
279  case Intrinsic::ppc_is_decremented_ctr_nonzero:
280  case Intrinsic::ppc_mtctr:
281  return true;
282 
283 // VisualStudio defines setjmp as _setjmp
284 #if defined(_MSC_VER) && defined(setjmp) && \
285  !defined(setjmp_undefined_for_msvc)
286 # pragma push_macro("setjmp")
287 # undef setjmp
288 # define setjmp_undefined_for_msvc
289 #endif
290 
291  case Intrinsic::setjmp:
292 
293 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
294  // let's return it to _setjmp state
295 # pragma pop_macro("setjmp")
296 # undef setjmp_undefined_for_msvc
297 #endif
298 
299  case Intrinsic::longjmp:
300 
301  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
302  // because, although it does clobber the counter register, the
303  // control can't then return to inside the loop unless there is also
304  // an eh_sjlj_setjmp.
305  case Intrinsic::eh_sjlj_setjmp:
306 
307  case Intrinsic::memcpy:
308  case Intrinsic::memmove:
309  case Intrinsic::memset:
310  case Intrinsic::powi:
311  case Intrinsic::log:
312  case Intrinsic::log2:
313  case Intrinsic::log10:
314  case Intrinsic::exp:
315  case Intrinsic::exp2:
316  case Intrinsic::pow:
317  case Intrinsic::sin:
318  case Intrinsic::cos:
319  return true;
320  case Intrinsic::copysign:
321  if (CI->getArgOperand(0)->getType()->getScalarType()->
322  isPPC_FP128Ty())
323  return true;
324  else
325  continue; // ISD::FCOPYSIGN is never a library call.
326  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
327  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
328  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
329  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
330  case Intrinsic::rint: Opcode = ISD::FRINT; break;
331  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
332  case Intrinsic::round: Opcode = ISD::FROUND; break;
333  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
334  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
335  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
336  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
337  }
338  }
339 
340  // PowerPC does not use [US]DIVREM or other library calls for
341  // operations on regular types which are not otherwise library calls
342  // (i.e. soft float or atomics). If adapting for targets that do,
343  // additional care is required here.
344 
345  LibFunc Func;
346  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
347  LibInfo->getLibFunc(F->getName(), Func) &&
348  LibInfo->hasOptimizedCodeGen(Func)) {
349  // Non-read-only functions are never treated as intrinsics.
350  if (!CI->onlyReadsMemory())
351  return true;
352 
353  // Conversion happens only for FP calls.
354  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
355  return true;
356 
357  switch (Func) {
358  default: return true;
359  case LibFunc_copysign:
360  case LibFunc_copysignf:
361  continue; // ISD::FCOPYSIGN is never a library call.
362  case LibFunc_copysignl:
363  return true;
364  case LibFunc_fabs:
365  case LibFunc_fabsf:
366  case LibFunc_fabsl:
367  continue; // ISD::FABS is never a library call.
368  case LibFunc_sqrt:
369  case LibFunc_sqrtf:
370  case LibFunc_sqrtl:
371  Opcode = ISD::FSQRT; break;
372  case LibFunc_floor:
373  case LibFunc_floorf:
374  case LibFunc_floorl:
375  Opcode = ISD::FFLOOR; break;
376  case LibFunc_nearbyint:
377  case LibFunc_nearbyintf:
378  case LibFunc_nearbyintl:
379  Opcode = ISD::FNEARBYINT; break;
380  case LibFunc_ceil:
381  case LibFunc_ceilf:
382  case LibFunc_ceill:
383  Opcode = ISD::FCEIL; break;
384  case LibFunc_rint:
385  case LibFunc_rintf:
386  case LibFunc_rintl:
387  Opcode = ISD::FRINT; break;
388  case LibFunc_round:
389  case LibFunc_roundf:
390  case LibFunc_roundl:
391  Opcode = ISD::FROUND; break;
392  case LibFunc_trunc:
393  case LibFunc_truncf:
394  case LibFunc_truncl:
395  Opcode = ISD::FTRUNC; break;
396  case LibFunc_fmin:
397  case LibFunc_fminf:
398  case LibFunc_fminl:
399  Opcode = ISD::FMINNUM; break;
400  case LibFunc_fmax:
401  case LibFunc_fmaxf:
402  case LibFunc_fmaxl:
403  Opcode = ISD::FMAXNUM; break;
404  }
405  }
406 
407  if (Opcode) {
408  EVT EVTy =
409  TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
410 
411  if (EVTy == MVT::Other)
412  return true;
413 
414  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
415  continue;
416  else if (EVTy.isVector() &&
417  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
418  continue;
419 
420  return true;
421  }
422  }
423 
424  return true;
425  } else if (isa<BinaryOperator>(J) &&
426  J->getType()->getScalarType()->isPPC_FP128Ty()) {
427  // Most operations on ppc_f128 values become calls.
428  return true;
429  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
430  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
431  CastInst *CI = cast<CastInst>(J);
432  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
433  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
434  isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
435  isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
436  return true;
437  } else if (isLargeIntegerTy(!TM->isPPC64(),
438  J->getType()->getScalarType()) &&
439  (J->getOpcode() == Instruction::UDiv ||
440  J->getOpcode() == Instruction::SDiv ||
441  J->getOpcode() == Instruction::URem ||
442  J->getOpcode() == Instruction::SRem)) {
443  return true;
444  } else if (!TM->isPPC64() &&
445  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
446  (J->getOpcode() == Instruction::Shl ||
447  J->getOpcode() == Instruction::AShr ||
448  J->getOpcode() == Instruction::LShr)) {
449  // Only on PPC32, for 128-bit integers (specifically not 64-bit
450  // integers), these might be runtime calls.
451  return true;
452  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
453  // On PowerPC, indirect jumps use the counter register.
454  return true;
455  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
456  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
457  return true;
458  }
459 
460  // FREM is always a call.
461  if (J->getOpcode() == Instruction::FRem)
462  return true;
463 
464  if (STI->useSoftFloat()) {
465  switch(J->getOpcode()) {
466  case Instruction::FAdd:
467  case Instruction::FSub:
468  case Instruction::FMul:
469  case Instruction::FDiv:
470  case Instruction::FPTrunc:
471  case Instruction::FPExt:
472  case Instruction::FPToUI:
473  case Instruction::FPToSI:
474  case Instruction::UIToFP:
475  case Instruction::SIToFP:
476  case Instruction::FCmp:
477  return true;
478  }
479  }
480 
481  for (Value *Operand : J->operands())
482  if (memAddrUsesCTR(*TM, Operand))
483  return true;
484  }
485 
486  return false;
487 }
488 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
489  bool MadeChange = false;
490 
491  // Do not convert small short loops to CTR loop.
492  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
493  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
495  auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
496  *L->getHeader()->getParent());
497  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
499  for (BasicBlock *BB : L->blocks())
500  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
501  // 6 is an approximate latency for the mtctr instruction.
502  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
503  return false;
504  }
505 
506  // Process nested loops first.
507  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
508  MadeChange |= convertToCTRLoop(*I);
509  LLVM_DEBUG(dbgs() << "Nested loop converted\n");
510  }
511 
512  // If a nested loop has been converted, then we can't convert this loop.
513  if (MadeChange)
514  return MadeChange;
515 
516  // Bail out if the loop has irreducible control flow.
517  LoopBlocksRPO RPOT(L);
518  RPOT.perform(LI);
519  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
520  return false;
521 
522 #ifndef NDEBUG
523  // Stop trying after reaching the limit (if any).
524  int Limit = CTRLoopLimit;
525  if (Limit >= 0) {
526  if (Counter >= CTRLoopLimit)
527  return false;
528  Counter++;
529  }
530 #endif
531 
532  // We don't want to spill/restore the counter register, and so we don't
533  // want to use the counter register if the loop contains calls.
534  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
535  I != IE; ++I)
536  if (mightUseCTR(*I))
537  return MadeChange;
538 
539  SmallVector<BasicBlock*, 4> ExitingBlocks;
540  L->getExitingBlocks(ExitingBlocks);
541 
542  // If there is an exit edge known to be frequently taken,
543  // we should not transform this loop.
544  for (auto &BB : ExitingBlocks) {
545  Instruction *TI = BB->getTerminator();
546  if (!TI) continue;
547 
548  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
549  uint64_t TrueWeight = 0, FalseWeight = 0;
550  if (!BI->isConditional() ||
551  !BI->extractProfMetadata(TrueWeight, FalseWeight))
552  continue;
553 
554  // If the exit path is more frequent than the loop path,
555  // we return here without further analysis for this loop.
556  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
557  if (( TrueIsExit && FalseWeight < TrueWeight) ||
558  (!TrueIsExit && FalseWeight > TrueWeight))
559  return MadeChange;
560  }
561  }
562 
563  BasicBlock *CountedExitBlock = nullptr;
564  const SCEV *ExitCount = nullptr;
565  BranchInst *CountedExitBranch = nullptr;
566  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
567  IE = ExitingBlocks.end(); I != IE; ++I) {
568  const SCEV *EC = SE->getExitCount(L, *I);
569  LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
570  << (*I)->getName() << ": " << *EC << "\n");
571  if (isa<SCEVCouldNotCompute>(EC))
572  continue;
573  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
574  if (ConstEC->getValue()->isZero())
575  continue;
576  } else if (!SE->isLoopInvariant(EC, L))
577  continue;
578 
579  if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
580  continue;
581 
582  // If this exiting block is contained in a nested loop, it is not eligible
583  // for insertion of the branch-and-decrement since the inner loop would
584  // end up messing up the value in the CTR.
585  if (LI->getLoopFor(*I) != L)
586  continue;
587 
588  // We now have a loop-invariant count of loop iterations (which is not the
589  // constant zero) for which we know that this loop will not exit via this
590  // existing block.
591 
592  // We need to make sure that this block will run on every loop iteration.
593  // For this to be true, we must dominate all blocks with backedges. Such
594  // blocks are in-loop predecessors to the header block.
595  bool NotAlways = false;
596  for (pred_iterator PI = pred_begin(L->getHeader()),
597  PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
598  if (!L->contains(*PI))
599  continue;
600 
601  if (!DT->dominates(*I, *PI)) {
602  NotAlways = true;
603  break;
604  }
605  }
606 
607  if (NotAlways)
608  continue;
609 
610  // Make sure this blocks ends with a conditional branch.
611  Instruction *TI = (*I)->getTerminator();
612  if (!TI)
613  continue;
614 
615  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
616  if (!BI->isConditional())
617  continue;
618 
619  CountedExitBranch = BI;
620  } else
621  continue;
622 
623  // Note that this block may not be the loop latch block, even if the loop
624  // has a latch block.
625  CountedExitBlock = *I;
626  ExitCount = EC;
627  break;
628  }
629 
630  if (!CountedExitBlock)
631  return MadeChange;
632 
633  BasicBlock *Preheader = L->getLoopPreheader();
634 
635  // If we don't have a preheader, then insert one. If we already have a
636  // preheader, then we can use it (except if the preheader contains a use of
637  // the CTR register because some such uses might be reordered by the
638  // selection DAG after the mtctr instruction).
639  if (!Preheader || mightUseCTR(Preheader))
640  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
641  if (!Preheader)
642  return MadeChange;
643 
644  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
645  << "\n");
646 
647  // Insert the count into the preheader and replace the condition used by the
648  // selected branch.
649  MadeChange = true;
650 
651  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
652  LLVMContext &C = SE->getContext();
653  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
654  if (!ExitCount->getType()->isPointerTy() &&
655  ExitCount->getType() != CountType)
656  ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
657  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
658  Value *ECValue =
659  SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
660 
661  IRBuilder<> CountBuilder(Preheader->getTerminator());
662  Module *M = Preheader->getParent()->getParent();
663  Function *MTCTRFunc =
664  Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
665  CountBuilder.CreateCall(MTCTRFunc, ECValue);
666 
667  IRBuilder<> CondBuilder(CountedExitBranch);
668  Function *DecFunc =
669  Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
670  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
671  Value *OldCond = CountedExitBranch->getCondition();
672  CountedExitBranch->setCondition(NewCond);
673 
674  // The false branch must exit the loop.
675  if (!L->contains(CountedExitBranch->getSuccessor(0)))
676  CountedExitBranch->swapSuccessors();
677 
678  // The old condition may be dead now, and may have even created a dead PHI
679  // (the original induction variable).
681  // Run through the basic blocks of the loop and see if any of them have dead
682  // PHIs that can be removed.
683  for (auto I : L->blocks())
684  DeleteDeadPHIs(I);
685 
686  ++NumCTRLoops;
687  return MadeChange;
688 }
689 
690 #ifndef NDEBUG
691 static bool clobbersCTR(const MachineInstr &MI) {
692  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
693  const MachineOperand &MO = MI.getOperand(i);
694  if (MO.isReg()) {
695  if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
696  return true;
697  } else if (MO.isRegMask()) {
698  if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
699  return true;
700  }
701  }
702 
703  return false;
704 }
705 
711  bool CheckPreds;
712 
713  if (I == MBB->begin()) {
714  Visited.insert(MBB);
715  goto queue_preds;
716  } else
717  --I;
718 
719 check_block:
720  Visited.insert(MBB);
721  if (I == MBB->end())
722  goto queue_preds;
723 
724  CheckPreds = true;
725  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
726  unsigned Opc = I->getOpcode();
727  if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
728  CheckPreds = false;
729  break;
730  }
731 
732  if (I != BI && clobbersCTR(*I)) {
733  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
734  << ") instruction " << *I
735  << " clobbers CTR, invalidating "
736  << printMBBReference(*BI->getParent()) << " ("
737  << BI->getParent()->getFullName() << ") instruction "
738  << *BI << "\n");
739  return false;
740  }
741 
742  if (I == IE)
743  break;
744  }
745 
746  if (!CheckPreds && Preds.empty())
747  return true;
748 
749  if (CheckPreds) {
750 queue_preds:
751  if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
752  LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
753  << printMBBReference(*BI->getParent()) << " ("
754  << BI->getParent()->getFullName() << ") instruction "
755  << *BI << "\n");
756  return false;
757  }
758 
760  PIE = MBB->pred_end(); PI != PIE; ++PI)
761  Preds.push_back(*PI);
762  }
763 
764  do {
765  MBB = Preds.pop_back_val();
766  if (!Visited.count(MBB)) {
767  I = MBB->getLastNonDebugInstr();
768  goto check_block;
769  }
770  } while (!Preds.empty());
771 
772  return true;
773 }
774 
775 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
776  MDT = &getAnalysis<MachineDominatorTree>();
777 
778  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
779  // any other instructions that might clobber the ctr register.
780  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
781  I != IE; ++I) {
782  MachineBasicBlock *MBB = &*I;
783  if (!MDT->isReachableFromEntry(MBB))
784  continue;
785 
787  MIIE = MBB->end(); MII != MIIE; ++MII) {
788  unsigned Opc = MII->getOpcode();
789  if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
790  Opc == PPC::BDZ8 || Opc == PPC::BDZ)
791  if (!verifyCTRBranch(MBB, MII))
792  llvm_unreachable("Invalid PPC CTR loop!");
793  }
794  }
795 
796  return false;
797 }
798 #endif // NDEBUG
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:595
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:611
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void swapSuccessors()
Swap the successors of this branch instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
void initializePPCCTRLoopsVerifyPass(PassRegistry &)
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:152
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:167
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
An immutable pass that tracks lazily created AssumptionCache objects.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:158
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number...
Definition: InlineAsm.h:148
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
std::vector< Loop *>::const_iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
Definition: LoopInfo.h:661
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
FunctionPass * createPPCCTRLoops()
static cl::opt< int > CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1))
BlockT * getHeader() const
Definition: LoopInfo.h:99
std::vector< Loop *>::const_iterator iterator
Definition: LoopInfo.h:138
ppc ctr loops PowerPC CTR Loops Verify
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_END(PPCCTRLoops
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
FunctionPass * createPPCCTRLoopsVerify()
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:120
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1018
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
Wrapper pass for TargetTransformInfo.
bool hasName() const
Definition: Value.h:250
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:463
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
Class to represent integer types.
Definition: DerivedTypes.h:39
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
Common code between 32-bit and 64-bit PowerPC targets.
std::vector< MachineBasicBlock * >::iterator pred_iterator
static double log2(double V)
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
Extended Value Type.
Definition: ValueTypes.h:33
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
void initializePPCCTRLoopsPass(PassRegistry &)
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:115
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:141
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:613
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
static bool asmClobbersCTR(InlineAsm *IA)
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:588
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end() const
Definition: LoopInfo.h:142
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
block_iterator block_end() const
Definition: LoopInfo.h:154
Same for multiplication.
Definition: ISDOpcodes.h:256
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
ppc ctr loops
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void setCondition(Value *V)
Multiway switch.
static bool clobbersCTR(const MachineInstr &MI)
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:969
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
block_iterator block_begin() const
Definition: LoopInfo.h:153
static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes...
Definition: InlineAsm.cpp:207
ppc ctr PowerPC CTR Loops
This class represents a constant integer value.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164