LLVM  9.0.0svn
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the PPC branch instructions
10 // that decrement and test the count register (CTR) (bdnz and friends).
11 //
12 // The pattern that defines the induction variable can changed depending on
13 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
14 // normalizes induction variables, and the Loop Strength Reduction pass
15 // run by 'llc' may also make changes to the induction variable.
16 //
17 // Criteria for CTR loops:
18 // - Countable loops (w/ ind. var for a trip count)
19 // - Try inner-most loops first
20 // - No nested CTR loops.
21 // - No function calls in loops.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "PPC.h"
26 #include "PPCSubtarget.h"
27 #include "PPCTargetMachine.h"
28 #include "PPCTargetTransformInfo.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/CFG.h"
34 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/ValueHandle.h"
50 #include "llvm/PassSupport.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
58 
59 #ifndef NDEBUG
64 #endif
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "ctrloops"
69 
70 #ifndef NDEBUG
71 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
72 #endif
73 
74 // The latency of mtctr is only justified if there are more than 4
75 // comparisons that will be removed as a result.
76 static cl::opt<unsigned>
77 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
78  cl::desc("Loops with a constant trip count smaller than "
79  "this value will not use the count register."));
80 
81 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
82 
83 namespace {
84  struct PPCCTRLoops : public FunctionPass {
85 
86 #ifndef NDEBUG
87  static int Counter;
88 #endif
89 
90  public:
91  static char ID;
92 
93  PPCCTRLoops() : FunctionPass(ID) {
95  }
96 
97  bool runOnFunction(Function &F) override;
98 
99  void getAnalysisUsage(AnalysisUsage &AU) const override {
107  }
108 
109  private:
110  bool mightUseCTR(BasicBlock *BB);
111  bool convertToCTRLoop(Loop *L);
112 
113  private:
114  const PPCTargetMachine *TM;
115  const PPCSubtarget *STI;
116  const PPCTargetLowering *TLI;
117  const DataLayout *DL;
118  const TargetLibraryInfo *LibInfo;
119  const TargetTransformInfo *TTI;
120  LoopInfo *LI;
121  ScalarEvolution *SE;
122  DominatorTree *DT;
123  bool PreserveLCSSA;
124  TargetSchedModel SchedModel;
125  };
126 
127  char PPCCTRLoops::ID = 0;
128 #ifndef NDEBUG
129  int PPCCTRLoops::Counter = 0;
130 #endif
131 
132 #ifndef NDEBUG
133  struct PPCCTRLoopsVerify : public MachineFunctionPass {
134  public:
135  static char ID;
136 
137  PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
139  }
140 
141  void getAnalysisUsage(AnalysisUsage &AU) const override {
144  }
145 
146  bool runOnMachineFunction(MachineFunction &MF) override;
147 
148  private:
150  };
151 
152  char PPCCTRLoopsVerify::ID = 0;
153 #endif // NDEBUG
154 } // end anonymous namespace
155 
156 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
157  false, false)
161 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
162  false, false)
163 
164 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
165 
166 #ifndef NDEBUG
167 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
168  "PowerPC CTR Loops Verify", false, false)
170 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
171  "PowerPC CTR Loops Verify", false, false)
172 
174  return new PPCCTRLoopsVerify();
175 }
176 #endif // NDEBUG
177 
179  if (skipFunction(F))
180  return false;
181 
182  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
183  if (!TPC)
184  return false;
185 
186  TM = &TPC->getTM<PPCTargetMachine>();
187  STI = TM->getSubtargetImpl(F);
188  TLI = STI->getTargetLowering();
189 
190  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
191  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
192  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
193  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
194  DL = &F.getParent()->getDataLayout();
195  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
196  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
197  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
198  SchedModel.init(STI);
199 
200  bool MadeChange = false;
201 
202  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
203  I != E; ++I) {
204  Loop *L = *I;
205  if (!L->getParentLoop())
206  MadeChange |= convertToCTRLoop(L);
207  }
208 
209  return MadeChange;
210 }
211 
212 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
213  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
214  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
215 
216  return false;
217 }
218 
219 // Determining the address of a TLS variable results in a function call in
220 // certain TLS models.
221 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
222  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
223  if (!GV) {
224  // Recurse to check for constants that refer to TLS global variables.
225  if (const auto *CV = dyn_cast<Constant>(MemAddr))
226  for (const auto &CO : CV->operands())
227  if (memAddrUsesCTR(TM, CO))
228  return true;
229 
230  return false;
231  }
232 
233  if (!GV->isThreadLocal())
234  return false;
236  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
237 }
238 
239 // Loop through the inline asm constraints and look for something that clobbers
240 // ctr.
241 static bool asmClobbersCTR(InlineAsm *IA) {
243  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
244  InlineAsm::ConstraintInfo &C = CIV[i];
245  if (C.Type != InlineAsm::isInput)
246  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
247  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
248  return true;
249  }
250  return false;
251 }
252 
253 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
254  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
255  J != JE; ++J) {
256  if (CallInst *CI = dyn_cast<CallInst>(J)) {
257  // Inline ASM is okay, unless it clobbers the ctr register.
258  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
259  if (asmClobbersCTR(IA))
260  return true;
261  continue;
262  }
263 
264  if (Function *F = CI->getCalledFunction()) {
265  // Most intrinsics don't become function calls, but some might.
266  // sin, cos, exp and log are always calls.
267  unsigned Opcode = 0;
269  switch (F->getIntrinsicID()) {
270  default: continue;
271  // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
272  // we're definitely using CTR.
273  case Intrinsic::ppc_is_decremented_ctr_nonzero:
274  case Intrinsic::ppc_mtctr:
275  return true;
276 
277 // VisualStudio defines setjmp as _setjmp
278 #if defined(_MSC_VER) && defined(setjmp) && \
279  !defined(setjmp_undefined_for_msvc)
280 # pragma push_macro("setjmp")
281 # undef setjmp
282 # define setjmp_undefined_for_msvc
283 #endif
284 
285  case Intrinsic::setjmp:
286 
287 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
288  // let's return it to _setjmp state
289 # pragma pop_macro("setjmp")
290 # undef setjmp_undefined_for_msvc
291 #endif
292 
293  case Intrinsic::longjmp:
294 
295  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
296  // because, although it does clobber the counter register, the
297  // control can't then return to inside the loop unless there is also
298  // an eh_sjlj_setjmp.
299  case Intrinsic::eh_sjlj_setjmp:
300 
301  case Intrinsic::memcpy:
302  case Intrinsic::memmove:
303  case Intrinsic::memset:
304  case Intrinsic::powi:
305  case Intrinsic::log:
306  case Intrinsic::log2:
307  case Intrinsic::log10:
308  case Intrinsic::exp:
309  case Intrinsic::exp2:
310  case Intrinsic::pow:
311  case Intrinsic::sin:
312  case Intrinsic::cos:
313  return true;
314  case Intrinsic::copysign:
315  if (CI->getArgOperand(0)->getType()->getScalarType()->
316  isPPC_FP128Ty())
317  return true;
318  else
319  continue; // ISD::FCOPYSIGN is never a library call.
320  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
321  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
322  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
323  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
324  case Intrinsic::rint: Opcode = ISD::FRINT; break;
325  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
326  case Intrinsic::round: Opcode = ISD::FROUND; break;
327  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
328  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
329  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
330  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
331  }
332  }
333 
334  // PowerPC does not use [US]DIVREM or other library calls for
335  // operations on regular types which are not otherwise library calls
336  // (i.e. soft float or atomics). If adapting for targets that do,
337  // additional care is required here.
338 
339  LibFunc Func;
340  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
341  LibInfo->getLibFunc(F->getName(), Func) &&
342  LibInfo->hasOptimizedCodeGen(Func)) {
343  // Non-read-only functions are never treated as intrinsics.
344  if (!CI->onlyReadsMemory())
345  return true;
346 
347  // Conversion happens only for FP calls.
348  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
349  return true;
350 
351  switch (Func) {
352  default: return true;
353  case LibFunc_copysign:
354  case LibFunc_copysignf:
355  continue; // ISD::FCOPYSIGN is never a library call.
356  case LibFunc_copysignl:
357  return true;
358  case LibFunc_fabs:
359  case LibFunc_fabsf:
360  case LibFunc_fabsl:
361  continue; // ISD::FABS is never a library call.
362  case LibFunc_sqrt:
363  case LibFunc_sqrtf:
364  case LibFunc_sqrtl:
365  Opcode = ISD::FSQRT; break;
366  case LibFunc_floor:
367  case LibFunc_floorf:
368  case LibFunc_floorl:
369  Opcode = ISD::FFLOOR; break;
370  case LibFunc_nearbyint:
371  case LibFunc_nearbyintf:
372  case LibFunc_nearbyintl:
373  Opcode = ISD::FNEARBYINT; break;
374  case LibFunc_ceil:
375  case LibFunc_ceilf:
376  case LibFunc_ceill:
377  Opcode = ISD::FCEIL; break;
378  case LibFunc_rint:
379  case LibFunc_rintf:
380  case LibFunc_rintl:
381  Opcode = ISD::FRINT; break;
382  case LibFunc_round:
383  case LibFunc_roundf:
384  case LibFunc_roundl:
385  Opcode = ISD::FROUND; break;
386  case LibFunc_trunc:
387  case LibFunc_truncf:
388  case LibFunc_truncl:
389  Opcode = ISD::FTRUNC; break;
390  case LibFunc_fmin:
391  case LibFunc_fminf:
392  case LibFunc_fminl:
393  Opcode = ISD::FMINNUM; break;
394  case LibFunc_fmax:
395  case LibFunc_fmaxf:
396  case LibFunc_fmaxl:
397  Opcode = ISD::FMAXNUM; break;
398  }
399  }
400 
401  if (Opcode) {
402  EVT EVTy =
403  TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
404 
405  if (EVTy == MVT::Other)
406  return true;
407 
408  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
409  continue;
410  else if (EVTy.isVector() &&
411  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
412  continue;
413 
414  return true;
415  }
416  }
417 
418  return true;
419  } else if (isa<BinaryOperator>(J) &&
420  J->getType()->getScalarType()->isPPC_FP128Ty()) {
421  // Most operations on ppc_f128 values become calls.
422  return true;
423  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
424  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
425  CastInst *CI = cast<CastInst>(J);
426  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
427  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
428  isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
429  isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
430  return true;
431  } else if (isLargeIntegerTy(!TM->isPPC64(),
432  J->getType()->getScalarType()) &&
433  (J->getOpcode() == Instruction::UDiv ||
434  J->getOpcode() == Instruction::SDiv ||
435  J->getOpcode() == Instruction::URem ||
436  J->getOpcode() == Instruction::SRem)) {
437  return true;
438  } else if (!TM->isPPC64() &&
439  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
440  (J->getOpcode() == Instruction::Shl ||
441  J->getOpcode() == Instruction::AShr ||
442  J->getOpcode() == Instruction::LShr)) {
443  // Only on PPC32, for 128-bit integers (specifically not 64-bit
444  // integers), these might be runtime calls.
445  return true;
446  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
447  // On PowerPC, indirect jumps use the counter register.
448  return true;
449  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
450  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
451  return true;
452  }
453 
454  // FREM is always a call.
455  if (J->getOpcode() == Instruction::FRem)
456  return true;
457 
458  if (STI->useSoftFloat()) {
459  switch(J->getOpcode()) {
460  case Instruction::FAdd:
461  case Instruction::FSub:
462  case Instruction::FMul:
463  case Instruction::FDiv:
464  case Instruction::FPTrunc:
465  case Instruction::FPExt:
466  case Instruction::FPToUI:
467  case Instruction::FPToSI:
468  case Instruction::UIToFP:
469  case Instruction::SIToFP:
470  case Instruction::FCmp:
471  return true;
472  }
473  }
474 
475  for (Value *Operand : J->operands())
476  if (memAddrUsesCTR(*TM, Operand))
477  return true;
478  }
479 
480  return false;
481 }
482 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
483  bool MadeChange = false;
484 
485  // Do not convert small short loops to CTR loop.
486  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
487  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
489  auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
490  *L->getHeader()->getParent());
491  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
493  for (BasicBlock *BB : L->blocks())
494  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
495  // 6 is an approximate latency for the mtctr instruction.
496  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
497  return false;
498  }
499 
500  // Process nested loops first.
501  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
502  MadeChange |= convertToCTRLoop(*I);
503  LLVM_DEBUG(dbgs() << "Nested loop converted\n");
504  }
505 
506  // If a nested loop has been converted, then we can't convert this loop.
507  if (MadeChange)
508  return MadeChange;
509 
510  // Bail out if the loop has irreducible control flow.
511  LoopBlocksRPO RPOT(L);
512  RPOT.perform(LI);
513  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
514  return false;
515 
516 #ifndef NDEBUG
517  // Stop trying after reaching the limit (if any).
518  int Limit = CTRLoopLimit;
519  if (Limit >= 0) {
520  if (Counter >= CTRLoopLimit)
521  return false;
522  Counter++;
523  }
524 #endif
525 
526  // We don't want to spill/restore the counter register, and so we don't
527  // want to use the counter register if the loop contains calls.
528  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
529  I != IE; ++I)
530  if (mightUseCTR(*I))
531  return MadeChange;
532 
533  SmallVector<BasicBlock*, 4> ExitingBlocks;
534  L->getExitingBlocks(ExitingBlocks);
535 
536  // If there is an exit edge known to be frequently taken,
537  // we should not transform this loop.
538  for (auto &BB : ExitingBlocks) {
539  Instruction *TI = BB->getTerminator();
540  if (!TI) continue;
541 
542  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
543  uint64_t TrueWeight = 0, FalseWeight = 0;
544  if (!BI->isConditional() ||
545  !BI->extractProfMetadata(TrueWeight, FalseWeight))
546  continue;
547 
548  // If the exit path is more frequent than the loop path,
549  // we return here without further analysis for this loop.
550  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
551  if (( TrueIsExit && FalseWeight < TrueWeight) ||
552  (!TrueIsExit && FalseWeight > TrueWeight))
553  return MadeChange;
554  }
555  }
556 
557  BasicBlock *CountedExitBlock = nullptr;
558  const SCEV *ExitCount = nullptr;
559  BranchInst *CountedExitBranch = nullptr;
560  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
561  IE = ExitingBlocks.end(); I != IE; ++I) {
562  const SCEV *EC = SE->getExitCount(L, *I);
563  LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
564  << (*I)->getName() << ": " << *EC << "\n");
565  if (isa<SCEVCouldNotCompute>(EC))
566  continue;
567  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
568  if (ConstEC->getValue()->isZero())
569  continue;
570  } else if (!SE->isLoopInvariant(EC, L))
571  continue;
572 
573  if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
574  continue;
575 
576  // If this exiting block is contained in a nested loop, it is not eligible
577  // for insertion of the branch-and-decrement since the inner loop would
578  // end up messing up the value in the CTR.
579  if (LI->getLoopFor(*I) != L)
580  continue;
581 
582  // We now have a loop-invariant count of loop iterations (which is not the
583  // constant zero) for which we know that this loop will not exit via this
584  // existing block.
585 
586  // We need to make sure that this block will run on every loop iteration.
587  // For this to be true, we must dominate all blocks with backedges. Such
588  // blocks are in-loop predecessors to the header block.
589  bool NotAlways = false;
590  for (pred_iterator PI = pred_begin(L->getHeader()),
591  PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
592  if (!L->contains(*PI))
593  continue;
594 
595  if (!DT->dominates(*I, *PI)) {
596  NotAlways = true;
597  break;
598  }
599  }
600 
601  if (NotAlways)
602  continue;
603 
604  // Make sure this blocks ends with a conditional branch.
605  Instruction *TI = (*I)->getTerminator();
606  if (!TI)
607  continue;
608 
609  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
610  if (!BI->isConditional())
611  continue;
612 
613  CountedExitBranch = BI;
614  } else
615  continue;
616 
617  // Note that this block may not be the loop latch block, even if the loop
618  // has a latch block.
619  CountedExitBlock = *I;
620  ExitCount = EC;
621  break;
622  }
623 
624  if (!CountedExitBlock)
625  return MadeChange;
626 
627  BasicBlock *Preheader = L->getLoopPreheader();
628 
629  // If we don't have a preheader, then insert one. If we already have a
630  // preheader, then we can use it (except if the preheader contains a use of
631  // the CTR register because some such uses might be reordered by the
632  // selection DAG after the mtctr instruction).
633  if (!Preheader || mightUseCTR(Preheader))
634  Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
635  if (!Preheader)
636  return MadeChange;
637 
638  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
639  << "\n");
640 
641  // Insert the count into the preheader and replace the condition used by the
642  // selected branch.
643  MadeChange = true;
644 
645  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
646  LLVMContext &C = SE->getContext();
647  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
648  if (!ExitCount->getType()->isPointerTy() &&
649  ExitCount->getType() != CountType)
650  ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
651  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
652  Value *ECValue =
653  SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
654 
655  IRBuilder<> CountBuilder(Preheader->getTerminator());
656  Module *M = Preheader->getParent()->getParent();
657  Function *MTCTRFunc =
658  Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
659  CountBuilder.CreateCall(MTCTRFunc, ECValue);
660 
661  IRBuilder<> CondBuilder(CountedExitBranch);
662  Function *DecFunc =
663  Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
664  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
665  Value *OldCond = CountedExitBranch->getCondition();
666  CountedExitBranch->setCondition(NewCond);
667 
668  // The false branch must exit the loop.
669  if (!L->contains(CountedExitBranch->getSuccessor(0)))
670  CountedExitBranch->swapSuccessors();
671 
672  // The old condition may be dead now, and may have even created a dead PHI
673  // (the original induction variable).
675  // Run through the basic blocks of the loop and see if any of them have dead
676  // PHIs that can be removed.
677  for (auto I : L->blocks())
678  DeleteDeadPHIs(I);
679 
680  ++NumCTRLoops;
681  return MadeChange;
682 }
683 
684 #ifndef NDEBUG
685 static bool clobbersCTR(const MachineInstr &MI) {
686  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
687  const MachineOperand &MO = MI.getOperand(i);
688  if (MO.isReg()) {
689  if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
690  return true;
691  } else if (MO.isRegMask()) {
692  if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
693  return true;
694  }
695  }
696 
697  return false;
698 }
699 
705  bool CheckPreds;
706 
707  if (I == MBB->begin()) {
708  Visited.insert(MBB);
709  goto queue_preds;
710  } else
711  --I;
712 
713 check_block:
714  Visited.insert(MBB);
715  if (I == MBB->end())
716  goto queue_preds;
717 
718  CheckPreds = true;
719  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
720  unsigned Opc = I->getOpcode();
721  if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
722  CheckPreds = false;
723  break;
724  }
725 
726  if (I != BI && clobbersCTR(*I)) {
727  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
728  << ") instruction " << *I
729  << " clobbers CTR, invalidating "
730  << printMBBReference(*BI->getParent()) << " ("
731  << BI->getParent()->getFullName() << ") instruction "
732  << *BI << "\n");
733  return false;
734  }
735 
736  if (I == IE)
737  break;
738  }
739 
740  if (!CheckPreds && Preds.empty())
741  return true;
742 
743  if (CheckPreds) {
744 queue_preds:
745  if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
746  LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
747  << printMBBReference(*BI->getParent()) << " ("
748  << BI->getParent()->getFullName() << ") instruction "
749  << *BI << "\n");
750  return false;
751  }
752 
754  PIE = MBB->pred_end(); PI != PIE; ++PI)
755  Preds.push_back(*PI);
756  }
757 
758  do {
759  MBB = Preds.pop_back_val();
760  if (!Visited.count(MBB)) {
761  I = MBB->getLastNonDebugInstr();
762  goto check_block;
763  }
764  } while (!Preds.empty());
765 
766  return true;
767 }
768 
769 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
770  MDT = &getAnalysis<MachineDominatorTree>();
771 
772  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
773  // any other instructions that might clobber the ctr register.
774  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
775  I != IE; ++I) {
776  MachineBasicBlock *MBB = &*I;
777  if (!MDT->isReachableFromEntry(MBB))
778  continue;
779 
781  MIIE = MBB->end(); MII != MIIE; ++MII) {
782  unsigned Opc = MII->getOpcode();
783  if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
784  Opc == PPC::BDZ8 || Opc == PPC::BDZ)
785  if (!verifyCTRBranch(MBB, MII))
786  llvm_unreachable("Invalid PPC CTR loop!");
787  }
788  }
789 
790  return false;
791 }
792 #endif // NDEBUG
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:674
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void swapSuccessors()
Swap the successors of this branch instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
void initializePPCCTRLoopsVerifyPass(PassRegistry &)
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:153
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:167
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
An immutable pass that tracks lazily created AssumptionCache objects.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:158
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number...
Definition: InlineAsm.h:148
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:416
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
std::vector< Loop *>::const_iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
Definition: LoopInfo.h:669
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
FunctionPass * createPPCCTRLoops()
static cl::opt< int > CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1))
BlockT * getHeader() const
Definition: LoopInfo.h:100
std::vector< Loop *>::const_iterator iterator
Definition: LoopInfo.h:139
ppc ctr loops PowerPC CTR Loops Verify
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_END(PPCCTRLoops
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
FunctionPass * createPPCCTRLoopsVerify()
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:120
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1022
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Wrapper pass for TargetTransformInfo.
bool hasName() const
Definition: Value.h:250
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:467
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
Class to represent integer types.
Definition: DerivedTypes.h:39
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
Common code between 32-bit and 64-bit PowerPC targets.
std::vector< MachineBasicBlock * >::iterator pred_iterator
static double log2(double V)
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:434
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
Extended Value Type.
Definition: ValueTypes.h:33
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
void initializePPCCTRLoopsPass(PassRegistry &)
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:115
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:676
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
static bool asmClobbersCTR(InlineAsm *IA)
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
block_iterator block_end() const
Definition: LoopInfo.h:155
Same for multiplication.
Definition: ISDOpcodes.h:256
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
ppc ctr loops
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void setCondition(Value *V)
Multiway switch.
static bool clobbersCTR(const MachineInstr &MI)
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:977
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
block_iterator block_begin() const
Definition: LoopInfo.h:154
static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes...
Definition: InlineAsm.cpp:207
ppc ctr PowerPC CTR Loops
This class represents a constant integer value.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164