LLVM  6.0.0svn
JumpThreading.cpp
Go to the documentation of this file.
1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Jump Threading pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/CFG.h"
30 #include "llvm/Analysis/Loads.h"
31 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CFG.h"
36 #include "llvm/IR/Constant.h"
37 #include "llvm/IR/ConstantRange.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/PassManager.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Use.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstddef>
73 #include <cstdint>
74 #include <iterator>
75 #include <memory>
76 #include <utility>
77 
78 using namespace llvm;
79 using namespace jumpthreading;
80 
81 #define DEBUG_TYPE "jump-threading"
82 
83 STATISTIC(NumThreads, "Number of jumps threaded");
84 STATISTIC(NumFolds, "Number of terminators folded");
85 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86 
87 static cl::opt<unsigned>
88 BBDuplicateThreshold("jump-threading-threshold",
89  cl::desc("Max block size to duplicate for jump threading"),
90  cl::init(6), cl::Hidden);
91 
92 static cl::opt<unsigned>
94  "jump-threading-implication-search-threshold",
95  cl::desc("The number of predecessors to search for a stronger "
96  "condition to use to thread over a weaker condition"),
97  cl::init(3), cl::Hidden);
98 
100  "print-lvi-after-jump-threading",
101  cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
102  cl::Hidden);
103 
104 namespace {
105 
106  /// This pass performs 'jump threading', which looks at blocks that have
107  /// multiple predecessors and multiple successors. If one or more of the
108  /// predecessors of the block can be proven to always jump to one of the
109  /// successors, we forward the edge from the predecessor to the successor by
110  /// duplicating the contents of this block.
111  ///
112  /// An example of when this can occur is code like this:
113  ///
114  /// if () { ...
115  /// X = 4;
116  /// }
117  /// if (X < 3) {
118  ///
119  /// In this case, the unconditional branch at the end of the first if can be
120  /// revectored to the false side of the second if.
121  class JumpThreading : public FunctionPass {
122  JumpThreadingPass Impl;
123 
124  public:
125  static char ID; // Pass identification
126 
127  JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
129  }
130 
131  bool runOnFunction(Function &F) override;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
140  }
141 
142  void releaseMemory() override { Impl.releaseMemory(); }
143  };
144 
145 } // end anonymous namespace
146 
147 char JumpThreading::ID = 0;
148 
149 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
150  "Jump Threading", false, false)
154 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
155  "Jump Threading", false, false)
156 
157 // Public interface to the Jump Threading pass
159  return new JumpThreading(Threshold);
160 }
161 
163  BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
164 }
165 
166 // Update branch probability information according to conditional
167 // branch probablity. This is usually made possible for cloned branches
168 // in inline instances by the context specific profile in the caller.
169 // For instance,
170 //
171 // [Block PredBB]
172 // [Branch PredBr]
173 // if (t) {
174 // Block A;
175 // } else {
176 // Block B;
177 // }
178 //
179 // [Block BB]
180 // cond = PN([true, %A], [..., %B]); // PHI node
181 // [Branch CondBr]
182 // if (cond) {
183 // ... // P(cond == true) = 1%
184 // }
185 //
186 // Here we know that when block A is taken, cond must be true, which means
187 // P(cond == true | A) = 1
188 //
189 // Given that P(cond == true) = P(cond == true | A) * P(A) +
190 // P(cond == true | B) * P(B)
191 // we get
192 // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
193 //
194 // which gives us:
195 // P(A) <= P(c == true), i.e.
196 // P(t == true) <= P(cond == true)
197 //
198 // In other words, if we know P(cond == true), we know that P(t == true)
199 // can not be greater than 1%.
201  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
202  if (!CondBr)
203  return;
204 
206  uint64_t TrueWeight, FalseWeight;
207  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
208  return;
209 
210  // Returns the outgoing edge of the dominating predecessor block
211  // that leads to the PhiNode's incoming block:
212  auto GetPredOutEdge =
213  [](BasicBlock *IncomingBB,
214  BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
215  auto *PredBB = IncomingBB;
216  auto *SuccBB = PhiBB;
217  while (true) {
218  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
219  if (PredBr && PredBr->isConditional())
220  return {PredBB, SuccBB};
221  auto *SinglePredBB = PredBB->getSinglePredecessor();
222  if (!SinglePredBB)
223  return {nullptr, nullptr};
224  SuccBB = PredBB;
225  PredBB = SinglePredBB;
226  }
227  };
228 
229  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
230  Value *PhiOpnd = PN->getIncomingValue(i);
231  ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
232 
233  if (!CI || !CI->getType()->isIntegerTy(1))
234  continue;
235 
237  TrueWeight, TrueWeight + FalseWeight)
239  FalseWeight, TrueWeight + FalseWeight));
240 
241  auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
242  if (!PredOutEdge.first)
243  return;
244 
245  BasicBlock *PredBB = PredOutEdge.first;
246  BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
247 
248  uint64_t PredTrueWeight, PredFalseWeight;
249  // FIXME: We currently only set the profile data when it is missing.
250  // With PGO, this can be used to refine even existing profile data with
251  // context information. This needs to be done after more performance
252  // testing.
253  if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
254  continue;
255 
256  // We can not infer anything useful when BP >= 50%, because BP is the
257  // upper bound probability value.
258  if (BP >= BranchProbability(50, 100))
259  continue;
260 
261  SmallVector<uint32_t, 2> Weights;
262  if (PredBr->getSuccessor(0) == PredOutEdge.second) {
263  Weights.push_back(BP.getNumerator());
264  Weights.push_back(BP.getCompl().getNumerator());
265  } else {
266  Weights.push_back(BP.getCompl().getNumerator());
267  Weights.push_back(BP.getNumerator());
268  }
270  MDBuilder(PredBr->getParent()->getContext())
271  .createBranchWeights(Weights));
272  }
273 }
274 
275 /// runOnFunction - Toplevel algorithm.
276 bool JumpThreading::runOnFunction(Function &F) {
277  if (skipFunction(F))
278  return false;
279  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
280  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
281  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
282  std::unique_ptr<BlockFrequencyInfo> BFI;
283  std::unique_ptr<BranchProbabilityInfo> BPI;
284  bool HasProfileData = F.getEntryCount().hasValue();
285  if (HasProfileData) {
286  LoopInfo LI{DominatorTree(F)};
287  BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
288  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
289  }
290 
291  bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
292  std::move(BPI));
294  dbgs() << "LVI for function '" << F.getName() << "':\n";
295  LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
296  dbgs());
297  }
298  return Changed;
299 }
300 
303  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
304  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
305  auto &AA = AM.getResult<AAManager>(F);
306 
307  std::unique_ptr<BlockFrequencyInfo> BFI;
308  std::unique_ptr<BranchProbabilityInfo> BPI;
309  bool HasProfileData = F.getEntryCount().hasValue();
310  if (HasProfileData) {
311  LoopInfo LI{DominatorTree(F)};
312  BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
313  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
314  }
315 
316  bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI),
317  std::move(BPI));
318 
319  if (!Changed)
320  return PreservedAnalyses::all();
322  PA.preserve<GlobalsAA>();
323  return PA;
324 }
325 
327  LazyValueInfo *LVI_, AliasAnalysis *AA_,
328  bool HasProfileData_,
329  std::unique_ptr<BlockFrequencyInfo> BFI_,
330  std::unique_ptr<BranchProbabilityInfo> BPI_) {
331  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
332  TLI = TLI_;
333  LVI = LVI_;
334  AA = AA_;
335  BFI.reset();
336  BPI.reset();
337  // When profile data is available, we need to update edge weights after
338  // successful jump threading, which requires both BPI and BFI being available.
339  HasProfileData = HasProfileData_;
340  auto *GuardDecl = F.getParent()->getFunction(
341  Intrinsic::getName(Intrinsic::experimental_guard));
342  HasGuards = GuardDecl && !GuardDecl->use_empty();
343  if (HasProfileData) {
344  BPI = std::move(BPI_);
345  BFI = std::move(BFI_);
346  }
347 
348  // Remove unreachable blocks from function as they may result in infinite
349  // loop. We do threading if we found something profitable. Jump threading a
350  // branch can create other opportunities. If these opportunities form a cycle
351  // i.e. if any jump threading is undoing previous threading in the path, then
352  // we will loop forever. We take care of this issue by not jump threading for
353  // back edges. This works for normal cases but not for unreachable blocks as
354  // they may have cycle with no back edge.
355  bool EverChanged = false;
356  EverChanged |= removeUnreachableBlocks(F, LVI);
357 
358  FindLoopHeaders(F);
359 
360  bool Changed;
361  do {
362  Changed = false;
363  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
364  BasicBlock *BB = &*I;
365  // Thread all of the branches we can over this block.
366  while (ProcessBlock(BB))
367  Changed = true;
368 
369  ++I;
370 
371  // If the block is trivially dead, zap it. This eliminates the successor
372  // edges which simplifies the CFG.
373  if (pred_empty(BB) &&
374  BB != &BB->getParent()->getEntryBlock()) {
375  DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
376  << "' with terminator: " << *BB->getTerminator() << '\n');
377  LoopHeaders.erase(BB);
378  LVI->eraseBlock(BB);
379  DeleteDeadBlock(BB);
380  Changed = true;
381  continue;
382  }
383 
385 
386  // Can't thread an unconditional jump, but if the block is "almost
387  // empty", we can replace uses of it with uses of the successor and make
388  // this dead.
389  // We should not eliminate the loop header or latch either, because
390  // eliminating a loop header or latch might later prevent LoopSimplify
391  // from transforming nested loops into simplified form. We will rely on
392  // later passes in backend to clean up empty blocks.
393  if (BI && BI->isUnconditional() &&
394  BB != &BB->getParent()->getEntryBlock() &&
395  // If the terminator is the only non-phi instruction, try to nuke it.
396  BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
397  !LoopHeaders.count(BI->getSuccessor(0))) {
398  // FIXME: It is always conservatively correct to drop the info
399  // for a block even if it doesn't get erased. This isn't totally
400  // awesome, but it allows us to use AssertingVH to prevent nasty
401  // dangling pointer issues within LazyValueInfo.
402  LVI->eraseBlock(BB);
404  Changed = true;
405  }
406  }
407  EverChanged |= Changed;
408  } while (Changed);
409 
410  LoopHeaders.clear();
411  return EverChanged;
412 }
413 
414 // Replace uses of Cond with ToVal when safe to do so. If all uses are
415 // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
416 // because we may incorrectly replace uses when guards/assumes are uses of
417 // of `Cond` and we used the guards/assume to reason about the `Cond` value
418 // at the end of block. RAUW unconditionally replaces all uses
419 // including the guards/assumes themselves and the uses before the
420 // guard/assume.
421 static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
422  assert(Cond->getType() == ToVal->getType());
423  auto *BB = Cond->getParent();
424  // We can unconditionally replace all uses in non-local blocks (i.e. uses
425  // strictly dominated by BB), since LVI information is true from the
426  // terminator of BB.
427  replaceNonLocalUsesWith(Cond, ToVal);
428  for (Instruction &I : reverse(*BB)) {
429  // Reached the Cond whose uses we are trying to replace, so there are no
430  // more uses.
431  if (&I == Cond)
432  break;
433  // We only replace uses in instructions that are guaranteed to reach the end
434  // of BB, where we know Cond is ToVal.
436  break;
437  I.replaceUsesOfWith(Cond, ToVal);
438  }
439  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
440  Cond->eraseFromParent();
441 }
442 
443 /// Return the cost of duplicating a piece of this block from first non-phi
444 /// and before StopAt instruction to thread across it. Stop scanning the block
445 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
447  Instruction *StopAt,
448  unsigned Threshold) {
449  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
450  /// Ignore PHI nodes, these will be flattened when duplication happens.
452 
453  // FIXME: THREADING will delete values that are just used to compute the
454  // branch, so they shouldn't count against the duplication cost.
455 
456  unsigned Bonus = 0;
457  if (BB->getTerminator() == StopAt) {
458  // Threading through a switch statement is particularly profitable. If this
459  // block ends in a switch, decrease its cost to make it more likely to
460  // happen.
461  if (isa<SwitchInst>(StopAt))
462  Bonus = 6;
463 
464  // The same holds for indirect branches, but slightly more so.
465  if (isa<IndirectBrInst>(StopAt))
466  Bonus = 8;
467  }
468 
469  // Bump the threshold up so the early exit from the loop doesn't skip the
470  // terminator-based Size adjustment at the end.
471  Threshold += Bonus;
472 
473  // Sum up the cost of each instruction until we get to the terminator. Don't
474  // include the terminator because the copy won't include it.
475  unsigned Size = 0;
476  for (; &*I != StopAt; ++I) {
477 
478  // Stop scanning the block if we've reached the threshold.
479  if (Size > Threshold)
480  return Size;
481 
482  // Debugger intrinsics don't incur code size.
483  if (isa<DbgInfoIntrinsic>(I)) continue;
484 
485  // If this is a pointer->pointer bitcast, it is free.
486  if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
487  continue;
488 
489  // Bail out if this instruction gives back a token type, it is not possible
490  // to duplicate it if it is used outside this BB.
491  if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
492  return ~0U;
493 
494  // All other instructions count for at least one unit.
495  ++Size;
496 
497  // Calls are more expensive. If they are non-intrinsic calls, we model them
498  // as having cost of 4. If they are a non-vector intrinsic, we model them
499  // as having cost of 2 total, and if they are a vector intrinsic, we model
500  // them as having cost 1.
501  if (const CallInst *CI = dyn_cast<CallInst>(I)) {
502  if (CI->cannotDuplicate() || CI->isConvergent())
503  // Blocks with NoDuplicate are modelled as having infinite cost, so they
504  // are never duplicated.
505  return ~0U;
506  else if (!isa<IntrinsicInst>(CI))
507  Size += 3;
508  else if (!CI->getType()->isVectorTy())
509  Size += 1;
510  }
511  }
512 
513  return Size > Bonus ? Size - Bonus : 0;
514 }
515 
516 /// FindLoopHeaders - We do not want jump threading to turn proper loop
517 /// structures into irreducible loops. Doing this breaks up the loop nesting
518 /// hierarchy and pessimizes later transformations. To prevent this from
519 /// happening, we first have to find the loop headers. Here we approximate this
520 /// by finding targets of backedges in the CFG.
521 ///
522 /// Note that there definitely are cases when we want to allow threading of
523 /// edges across a loop header. For example, threading a jump from outside the
524 /// loop (the preheader) to an exit block of the loop is definitely profitable.
525 /// It is also almost always profitable to thread backedges from within the loop
526 /// to exit blocks, and is often profitable to thread backedges to other blocks
527 /// within the loop (forming a nested loop). This simple analysis is not rich
528 /// enough to track all of these properties and keep it up-to-date as the CFG
529 /// mutates, so we don't allow any of these transformations.
532  FindFunctionBackedges(F, Edges);
533 
534  for (const auto &Edge : Edges)
535  LoopHeaders.insert(Edge.second);
536 }
537 
538 /// getKnownConstant - Helper method to determine if we can thread over a
539 /// terminator with the given value as its condition, and if so what value to
540 /// use for that. What kind of value this is depends on whether we want an
541 /// integer or a block address, but an undef is always accepted.
542 /// Returns null if Val is null or not an appropriate constant.
544  if (!Val)
545  return nullptr;
546 
547  // Undef is "known" enough.
548  if (UndefValue *U = dyn_cast<UndefValue>(Val))
549  return U;
550 
551  if (Preference == WantBlockAddress)
552  return dyn_cast<BlockAddress>(Val->stripPointerCasts());
553 
554  return dyn_cast<ConstantInt>(Val);
555 }
556 
557 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
558 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
559 /// in any of our predecessors. If so, return the known list of value and pred
560 /// BB in the result vector.
561 ///
562 /// This returns true if there were any known values.
564  Value *V, BasicBlock *BB, PredValueInfo &Result,
566  // This method walks up use-def chains recursively. Because of this, we could
567  // get into an infinite loop going around loops in the use-def chain. To
568  // prevent this, keep track of what (value, block) pairs we've already visited
569  // and terminate the search if we loop back to them
570  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
571  return false;
572 
573  // An RAII help to remove this pair from the recursion set once the recursion
574  // stack pops back out again.
575  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
576 
577  // If V is a constant, then it is known in all predecessors.
578  if (Constant *KC = getKnownConstant(V, Preference)) {
579  for (BasicBlock *Pred : predecessors(BB))
580  Result.push_back(std::make_pair(KC, Pred));
581 
582  return !Result.empty();
583  }
584 
585  // If V is a non-instruction value, or an instruction in a different block,
586  // then it can't be derived from a PHI.
588  if (!I || I->getParent() != BB) {
589 
590  // Okay, if this is a live-in value, see if it has a known value at the end
591  // of any of our predecessors.
592  //
593  // FIXME: This should be an edge property, not a block end property.
594  /// TODO: Per PR2563, we could infer value range information about a
595  /// predecessor based on its terminator.
596  //
597  // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
598  // "I" is a non-local compare-with-a-constant instruction. This would be
599  // able to handle value inequalities better, for example if the compare is
600  // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
601  // Perhaps getConstantOnEdge should be smart enough to do this?
602 
603  for (BasicBlock *P : predecessors(BB)) {
604  // If the value is known by LazyValueInfo to be a constant in a
605  // predecessor, use that information to try to thread this block.
606  Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
607  if (Constant *KC = getKnownConstant(PredCst, Preference))
608  Result.push_back(std::make_pair(KC, P));
609  }
610 
611  return !Result.empty();
612  }
613 
614  /// If I is a PHI node, then we know the incoming values for any constants.
615  if (PHINode *PN = dyn_cast<PHINode>(I)) {
616  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
617  Value *InVal = PN->getIncomingValue(i);
618  if (Constant *KC = getKnownConstant(InVal, Preference)) {
619  Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
620  } else {
621  Constant *CI = LVI->getConstantOnEdge(InVal,
622  PN->getIncomingBlock(i),
623  BB, CxtI);
624  if (Constant *KC = getKnownConstant(CI, Preference))
625  Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
626  }
627  }
628 
629  return !Result.empty();
630  }
631 
632  // Handle Cast instructions. Only see through Cast when the source operand is
633  // PHI or Cmp and the source type is i1 to save the compilation time.
634  if (CastInst *CI = dyn_cast<CastInst>(I)) {
635  Value *Source = CI->getOperand(0);
636  if (!Source->getType()->isIntegerTy(1))
637  return false;
638  if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
639  return false;
640  ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
641  if (Result.empty())
642  return false;
643 
644  // Convert the known values.
645  for (auto &R : Result)
646  R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
647 
648  return true;
649  }
650 
651  PredValueInfoTy LHSVals, RHSVals;
652 
653  // Handle some boolean conditions.
654  if (I->getType()->getPrimitiveSizeInBits() == 1) {
655  assert(Preference == WantInteger && "One-bit non-integer type?");
656  // X | true -> true
657  // X & false -> false
658  if (I->getOpcode() == Instruction::Or ||
659  I->getOpcode() == Instruction::And) {
660  ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
661  WantInteger, CxtI);
662  ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
663  WantInteger, CxtI);
664 
665  if (LHSVals.empty() && RHSVals.empty())
666  return false;
667 
668  ConstantInt *InterestingVal;
669  if (I->getOpcode() == Instruction::Or)
670  InterestingVal = ConstantInt::getTrue(I->getContext());
671  else
672  InterestingVal = ConstantInt::getFalse(I->getContext());
673 
674  SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
675 
676  // Scan for the sentinel. If we find an undef, force it to the
677  // interesting value: x|undef -> true and x&undef -> false.
678  for (const auto &LHSVal : LHSVals)
679  if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
680  Result.emplace_back(InterestingVal, LHSVal.second);
681  LHSKnownBBs.insert(LHSVal.second);
682  }
683  for (const auto &RHSVal : RHSVals)
684  if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
685  // If we already inferred a value for this block on the LHS, don't
686  // re-add it.
687  if (!LHSKnownBBs.count(RHSVal.second))
688  Result.emplace_back(InterestingVal, RHSVal.second);
689  }
690 
691  return !Result.empty();
692  }
693 
694  // Handle the NOT form of XOR.
695  if (I->getOpcode() == Instruction::Xor &&
696  isa<ConstantInt>(I->getOperand(1)) &&
697  cast<ConstantInt>(I->getOperand(1))->isOne()) {
698  ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
699  WantInteger, CxtI);
700  if (Result.empty())
701  return false;
702 
703  // Invert the known values.
704  for (auto &R : Result)
705  R.first = ConstantExpr::getNot(R.first);
706 
707  return true;
708  }
709 
710  // Try to simplify some other binary operator values.
711  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
712  assert(Preference != WantBlockAddress
713  && "A binary operator creating a block address?");
714  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
715  PredValueInfoTy LHSVals;
716  ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
717  WantInteger, CxtI);
718 
719  // Try to use constant folding to simplify the binary operator.
720  for (const auto &LHSVal : LHSVals) {
721  Constant *V = LHSVal.first;
722  Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
723 
724  if (Constant *KC = getKnownConstant(Folded, WantInteger))
725  Result.push_back(std::make_pair(KC, LHSVal.second));
726  }
727  }
728 
729  return !Result.empty();
730  }
731 
732  // Handle compare with phi operand, where the PHI is defined in this block.
733  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
734  assert(Preference == WantInteger && "Compares only produce integers");
735  Type *CmpType = Cmp->getType();
736  Value *CmpLHS = Cmp->getOperand(0);
737  Value *CmpRHS = Cmp->getOperand(1);
738  CmpInst::Predicate Pred = Cmp->getPredicate();
739 
740  PHINode *PN = dyn_cast<PHINode>(CmpLHS);
741  if (PN && PN->getParent() == BB) {
742  const DataLayout &DL = PN->getModule()->getDataLayout();
743  // We can do this simplification if any comparisons fold to true or false.
744  // See if any do.
745  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
746  BasicBlock *PredBB = PN->getIncomingBlock(i);
747  Value *LHS = PN->getIncomingValue(i);
748  Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
749 
750  Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
751  if (!Res) {
752  if (!isa<Constant>(RHS))
753  continue;
754 
756  ResT = LVI->getPredicateOnEdge(Pred, LHS,
757  cast<Constant>(RHS), PredBB, BB,
758  CxtI ? CxtI : Cmp);
759  if (ResT == LazyValueInfo::Unknown)
760  continue;
761  Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
762  }
763 
764  if (Constant *KC = getKnownConstant(Res, WantInteger))
765  Result.push_back(std::make_pair(KC, PredBB));
766  }
767 
768  return !Result.empty();
769  }
770 
771  // If comparing a live-in value against a constant, see if we know the
772  // live-in value on any predecessors.
773  if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
774  Constant *CmpConst = cast<Constant>(CmpRHS);
775 
776  if (!isa<Instruction>(CmpLHS) ||
777  cast<Instruction>(CmpLHS)->getParent() != BB) {
778  for (BasicBlock *P : predecessors(BB)) {
779  // If the value is known by LazyValueInfo to be a constant in a
780  // predecessor, use that information to try to thread this block.
782  LVI->getPredicateOnEdge(Pred, CmpLHS,
783  CmpConst, P, BB, CxtI ? CxtI : Cmp);
784  if (Res == LazyValueInfo::Unknown)
785  continue;
786 
787  Constant *ResC = ConstantInt::get(CmpType, Res);
788  Result.push_back(std::make_pair(ResC, P));
789  }
790 
791  return !Result.empty();
792  }
793 
794  // InstCombine can fold some forms of constant range checks into
795  // (icmp (add (x, C1)), C2). See if we have we have such a thing with
796  // x as a live-in.
797  {
798  using namespace PatternMatch;
799 
800  Value *AddLHS;
801  ConstantInt *AddConst;
802  if (isa<ConstantInt>(CmpConst) &&
803  match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
804  if (!isa<Instruction>(AddLHS) ||
805  cast<Instruction>(AddLHS)->getParent() != BB) {
806  for (BasicBlock *P : predecessors(BB)) {
807  // If the value is known by LazyValueInfo to be a ConstantRange in
808  // a predecessor, use that information to try to thread this
809  // block.
810  ConstantRange CR = LVI->getConstantRangeOnEdge(
811  AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
812  // Propagate the range through the addition.
813  CR = CR.add(AddConst->getValue());
814 
815  // Get the range where the compare returns true.
817  Pred, cast<ConstantInt>(CmpConst)->getValue());
818 
819  Constant *ResC;
820  if (CmpRange.contains(CR))
821  ResC = ConstantInt::getTrue(CmpType);
822  else if (CmpRange.inverse().contains(CR))
823  ResC = ConstantInt::getFalse(CmpType);
824  else
825  continue;
826 
827  Result.push_back(std::make_pair(ResC, P));
828  }
829 
830  return !Result.empty();
831  }
832  }
833  }
834 
835  // Try to find a constant value for the LHS of a comparison,
836  // and evaluate it statically if we can.
837  PredValueInfoTy LHSVals;
838  ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
839  WantInteger, CxtI);
840 
841  for (const auto &LHSVal : LHSVals) {
842  Constant *V = LHSVal.first;
843  Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
844  if (Constant *KC = getKnownConstant(Folded, WantInteger))
845  Result.push_back(std::make_pair(KC, LHSVal.second));
846  }
847 
848  return !Result.empty();
849  }
850  }
851 
852  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
853  // Handle select instructions where at least one operand is a known constant
854  // and we can figure out the condition value for any predecessor block.
855  Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
856  Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
857  PredValueInfoTy Conds;
858  if ((TrueVal || FalseVal) &&
859  ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
860  WantInteger, CxtI)) {
861  for (auto &C : Conds) {
862  Constant *Cond = C.first;
863 
864  // Figure out what value to use for the condition.
865  bool KnownCond;
866  if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
867  // A known boolean.
868  KnownCond = CI->isOne();
869  } else {
870  assert(isa<UndefValue>(Cond) && "Unexpected condition value");
871  // Either operand will do, so be sure to pick the one that's a known
872  // constant.
873  // FIXME: Do this more cleverly if both values are known constants?
874  KnownCond = (TrueVal != nullptr);
875  }
876 
877  // See if the select has a known constant value for this predecessor.
878  if (Constant *Val = KnownCond ? TrueVal : FalseVal)
879  Result.push_back(std::make_pair(Val, C.second));
880  }
881 
882  return !Result.empty();
883  }
884  }
885 
886  // If all else fails, see if LVI can figure out a constant value for us.
887  Constant *CI = LVI->getConstant(V, BB, CxtI);
888  if (Constant *KC = getKnownConstant(CI, Preference)) {
889  for (BasicBlock *Pred : predecessors(BB))
890  Result.push_back(std::make_pair(KC, Pred));
891  }
892 
893  return !Result.empty();
894 }
895 
896 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
897 /// in an undefined jump, decide which block is best to revector to.
898 ///
899 /// Since we can pick an arbitrary destination, we pick the successor with the
900 /// fewest predecessors. This should reduce the in-degree of the others.
901 static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
902  TerminatorInst *BBTerm = BB->getTerminator();
903  unsigned MinSucc = 0;
904  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
905  // Compute the successor with the minimum number of predecessors.
906  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
907  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
908  TestBB = BBTerm->getSuccessor(i);
909  unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
910  if (NumPreds < MinNumPreds) {
911  MinSucc = i;
912  MinNumPreds = NumPreds;
913  }
914  }
915 
916  return MinSucc;
917 }
918 
920  if (!BB->hasAddressTaken()) return false;
921 
922  // If the block has its address taken, it may be a tree of dead constants
923  // hanging off of it. These shouldn't keep the block alive.
926  return !BA->use_empty();
927 }
928 
929 /// ProcessBlock - If there are any predecessors whose control can be threaded
930 /// through to a successor, transform them now.
932  // If the block is trivially dead, just return and let the caller nuke it.
933  // This simplifies other transformations.
934  if (pred_empty(BB) &&
935  BB != &BB->getParent()->getEntryBlock())
936  return false;
937 
938  // If this block has a single predecessor, and if that pred has a single
939  // successor, merge the blocks. This encourages recursive jump threading
940  // because now the condition in this block can be threaded through
941  // predecessors of our predecessor block.
942  if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
943  const TerminatorInst *TI = SinglePred->getTerminator();
944  if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
945  SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
946  // If SinglePred was a loop header, BB becomes one.
947  if (LoopHeaders.erase(SinglePred))
948  LoopHeaders.insert(BB);
949 
950  LVI->eraseBlock(SinglePred);
952 
953  // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
954  // BB code within one basic block `BB`), we need to invalidate the LVI
955  // information associated with BB, because the LVI information need not be
956  // true for all of BB after the merge. For example,
957  // Before the merge, LVI info and code is as follows:
958  // SinglePred: <LVI info1 for %p val>
959  // %y = use of %p
960  // call @exit() // need not transfer execution to successor.
961  // assume(%p) // from this point on %p is true
962  // br label %BB
963  // BB: <LVI info2 for %p val, i.e. %p is true>
964  // %x = use of %p
965  // br label exit
966  //
967  // Note that this LVI info for blocks BB and SinglPred is correct for %p
968  // (info2 and info1 respectively). After the merge and the deletion of the
969  // LVI info1 for SinglePred. We have the following code:
970  // BB: <LVI info2 for %p val>
971  // %y = use of %p
972  // call @exit()
973  // assume(%p)
974  // %x = use of %p <-- LVI info2 is correct from here onwards.
975  // br label exit
976  // LVI info2 for BB is incorrect at the beginning of BB.
977 
978  // Invalidate LVI information for BB if the LVI is not provably true for
979  // all of BB.
980  if (any_of(*BB, [](Instruction &I) {
982  }))
983  LVI->eraseBlock(BB);
984  return true;
985  }
986  }
987 
988  if (TryToUnfoldSelectInCurrBB(BB))
989  return true;
990 
991  // Look if we can propagate guards to predecessors.
992  if (HasGuards && ProcessGuards(BB))
993  return true;
994 
995  // What kind of constant we're looking for.
997 
998  // Look to see if the terminator is a conditional branch, switch or indirect
999  // branch, if not we can't thread it.
1000  Value *Condition;
1002  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
1003  // Can't thread an unconditional jump.
1004  if (BI->isUnconditional()) return false;
1005  Condition = BI->getCondition();
1006  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
1007  Condition = SI->getCondition();
1008  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
1009  // Can't thread indirect branch with no successors.
1010  if (IB->getNumSuccessors() == 0) return false;
1011  Condition = IB->getAddress()->stripPointerCasts();
1012  Preference = WantBlockAddress;
1013  } else {
1014  return false; // Must be an invoke.
1015  }
1016 
1017  // Run constant folding to see if we can reduce the condition to a simple
1018  // constant.
1019  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1020  Value *SimpleVal =
1022  if (SimpleVal) {
1023  I->replaceAllUsesWith(SimpleVal);
1024  if (isInstructionTriviallyDead(I, TLI))
1025  I->eraseFromParent();
1026  Condition = SimpleVal;
1027  }
1028  }
1029 
1030  // If the terminator is branching on an undef, we can pick any of the
1031  // successors to branch to. Let GetBestDestForJumpOnUndef decide.
1032  if (isa<UndefValue>(Condition)) {
1033  unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
1034 
1035  // Fold the branch/switch.
1036  TerminatorInst *BBTerm = BB->getTerminator();
1037  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1038  if (i == BestSucc) continue;
1039  BBTerm->getSuccessor(i)->removePredecessor(BB, true);
1040  }
1041 
1042  DEBUG(dbgs() << " In block '" << BB->getName()
1043  << "' folding undef terminator: " << *BBTerm << '\n');
1044  BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1045  BBTerm->eraseFromParent();
1046  return true;
1047  }
1048 
1049  // If the terminator of this block is branching on a constant, simplify the
1050  // terminator to an unconditional branch. This can occur due to threading in
1051  // other blocks.
1052  if (getKnownConstant(Condition, Preference)) {
1053  DEBUG(dbgs() << " In block '" << BB->getName()
1054  << "' folding terminator: " << *BB->getTerminator() << '\n');
1055  ++NumFolds;
1056  ConstantFoldTerminator(BB, true);
1057  return true;
1058  }
1059 
1060  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1061 
1062  // All the rest of our checks depend on the condition being an instruction.
1063  if (!CondInst) {
1064  // FIXME: Unify this with code below.
1065  if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
1066  return true;
1067  return false;
1068  }
1069 
1070  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
1071  // If we're branching on a conditional, LVI might be able to determine
1072  // it's value at the branch instruction. We only handle comparisons
1073  // against a constant at this time.
1074  // TODO: This should be extended to handle switches as well.
1075  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
1076  Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
1077  if (CondBr && CondConst) {
1078  // We should have returned as soon as we turn a conditional branch to
1079  // unconditional. Because its no longer interesting as far as jump
1080  // threading is concerned.
1081  assert(CondBr->isConditional() && "Threading on unconditional terminator");
1082 
1084  LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1085  CondConst, CondBr);
1086  if (Ret != LazyValueInfo::Unknown) {
1087  unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
1088  unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
1089  CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
1090  BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
1091  CondBr->eraseFromParent();
1092  if (CondCmp->use_empty())
1093  CondCmp->eraseFromParent();
1094  // We can safely replace *some* uses of the CondInst if it has
1095  // exactly one value as returned by LVI. RAUW is incorrect in the
1096  // presence of guards and assumes, that have the `Cond` as the use. This
1097  // is because we use the guards/assume to reason about the `Cond` value
1098  // at the end of block, but RAUW unconditionally replaces all uses
1099  // including the guards/assumes themselves and the uses before the
1100  // guard/assume.
1101  else if (CondCmp->getParent() == BB) {
1102  auto *CI = Ret == LazyValueInfo::True ?
1103  ConstantInt::getTrue(CondCmp->getType()) :
1104  ConstantInt::getFalse(CondCmp->getType());
1105  ReplaceFoldableUses(CondCmp, CI);
1106  }
1107  return true;
1108  }
1109 
1110  // We did not manage to simplify this branch, try to see whether
1111  // CondCmp depends on a known phi-select pattern.
1112  if (TryToUnfoldSelect(CondCmp, BB))
1113  return true;
1114  }
1115  }
1116 
1117  // Check for some cases that are worth simplifying. Right now we want to look
1118  // for loads that are used by a switch or by the condition for the branch. If
1119  // we see one, check to see if it's partially redundant. If so, insert a PHI
1120  // which can then be used to thread the values.
1121  Value *SimplifyValue = CondInst;
1122  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1123  if (isa<Constant>(CondCmp->getOperand(1)))
1124  SimplifyValue = CondCmp->getOperand(0);
1125 
1126  // TODO: There are other places where load PRE would be profitable, such as
1127  // more complex comparisons.
1128  if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
1129  if (SimplifyPartiallyRedundantLoad(LI))
1130  return true;
1131 
1132  // Before threading, try to propagate profile data backwards:
1133  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1134  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1136 
1137  // Handle a variety of cases where we are branching on something derived from
1138  // a PHI node in the current block. If we can prove that any predecessors
1139  // compute a predictable value based on a PHI node, thread those predecessors.
1140  if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
1141  return true;
1142 
1143  // If this is an otherwise-unfoldable branch on a phi node in the current
1144  // block, see if we can simplify.
1145  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1146  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1147  return ProcessBranchOnPHI(PN);
1148 
1149  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1150  if (CondInst->getOpcode() == Instruction::Xor &&
1151  CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1152  return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
1153 
1154  // Search for a stronger dominating condition that can be used to simplify a
1155  // conditional branch leaving BB.
1156  if (ProcessImpliedCondition(BB))
1157  return true;
1158 
1159  return false;
1160 }
1161 
1163  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1164  if (!BI || !BI->isConditional())
1165  return false;
1166 
1167  Value *Cond = BI->getCondition();
1168  BasicBlock *CurrentBB = BB;
1169  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1170  unsigned Iter = 0;
1171 
1172  auto &DL = BB->getModule()->getDataLayout();
1173 
1174  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1175  auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1176  if (!PBI || !PBI->isConditional())
1177  return false;
1178  if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1179  return false;
1180 
1181  bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1182  Optional<bool> Implication =
1183  isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1184  if (Implication) {
1185  BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB);
1186  BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI);
1187  BI->eraseFromParent();
1188  return true;
1189  }
1190  CurrentBB = CurrentPred;
1191  CurrentPred = CurrentBB->getSinglePredecessor();
1192  }
1193 
1194  return false;
1195 }
1196 
1197 /// Return true if Op is an instruction defined in the given block.
1199  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1200  if (OpInst->getParent() == BB)
1201  return true;
1202  return false;
1203 }
1204 
1205 /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
1206 /// load instruction, eliminate it by replacing it with a PHI node. This is an
1207 /// important optimization that encourages jump threading, and needs to be run
1208 /// interlaced with other jump threading tasks.
1210  // Don't hack volatile and ordered loads.
1211  if (!LI->isUnordered()) return false;
1212 
1213  // If the load is defined in a block with exactly one predecessor, it can't be
1214  // partially redundant.
1215  BasicBlock *LoadBB = LI->getParent();
1216  if (LoadBB->getSinglePredecessor())
1217  return false;
1218 
1219  // If the load is defined in an EH pad, it can't be partially redundant,
1220  // because the edges between the invoke and the EH pad cannot have other
1221  // instructions between them.
1222  if (LoadBB->isEHPad())
1223  return false;
1224 
1225  Value *LoadedPtr = LI->getOperand(0);
1226 
1227  // If the loaded operand is defined in the LoadBB and its not a phi,
1228  // it can't be available in predecessors.
1229  if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1230  return false;
1231 
1232  // Scan a few instructions up from the load, to see if it is obviously live at
1233  // the entry to its block.
1234  BasicBlock::iterator BBIt(LI);
1235  bool IsLoadCSE;
1236  if (Value *AvailableVal = FindAvailableLoadedValue(
1237  LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1238  // If the value of the load is locally available within the block, just use
1239  // it. This frequently occurs for reg2mem'd allocas.
1240 
1241  if (IsLoadCSE) {
1242  LoadInst *NLI = cast<LoadInst>(AvailableVal);
1243  combineMetadataForCSE(NLI, LI);
1244  };
1245 
1246  // If the returned value is the load itself, replace with an undef. This can
1247  // only happen in dead loops.
1248  if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
1249  if (AvailableVal->getType() != LI->getType())
1250  AvailableVal =
1251  CastInst::CreateBitOrPointerCast(AvailableVal, LI->getType(), "", LI);
1252  LI->replaceAllUsesWith(AvailableVal);
1253  LI->eraseFromParent();
1254  return true;
1255  }
1256 
1257  // Otherwise, if we scanned the whole block and got to the top of the block,
1258  // we know the block is locally transparent to the load. If not, something
1259  // might clobber its value.
1260  if (BBIt != LoadBB->begin())
1261  return false;
1262 
1263  // If all of the loads and stores that feed the value have the same AA tags,
1264  // then we can propagate them onto any newly inserted loads.
1265  AAMDNodes AATags;
1266  LI->getAAMetadata(AATags);
1267 
1268  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1269 
1270  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1271 
1272  AvailablePredsTy AvailablePreds;
1273  BasicBlock *OneUnavailablePred = nullptr;
1274  SmallVector<LoadInst*, 8> CSELoads;
1275 
1276  // If we got here, the loaded value is transparent through to the start of the
1277  // block. Check to see if it is available in any of the predecessor blocks.
1278  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1279  // If we already scanned this predecessor, skip it.
1280  if (!PredsScanned.insert(PredBB).second)
1281  continue;
1282 
1283  BBIt = PredBB->end();
1284  unsigned NumScanedInst = 0;
1285  Value *PredAvailable = nullptr;
1286  // NOTE: We don't CSE load that is volatile or anything stronger than
1287  // unordered, that should have been checked when we entered the function.
1288  assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
1289  // If this is a load on a phi pointer, phi-translate it and search
1290  // for available load/store to the pointer in predecessors.
1291  Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
1292  PredAvailable = FindAvailablePtrLoadStore(
1293  Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1294  AA, &IsLoadCSE, &NumScanedInst);
1295 
1296  // If PredBB has a single predecessor, continue scanning through the
1297  // single precessor.
1298  BasicBlock *SinglePredBB = PredBB;
1299  while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1300  NumScanedInst < DefMaxInstsToScan) {
1301  SinglePredBB = SinglePredBB->getSinglePredecessor();
1302  if (SinglePredBB) {
1303  BBIt = SinglePredBB->end();
1304  PredAvailable = FindAvailablePtrLoadStore(
1305  Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
1306  (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1307  &NumScanedInst);
1308  }
1309  }
1310 
1311  if (!PredAvailable) {
1312  OneUnavailablePred = PredBB;
1313  continue;
1314  }
1315 
1316  if (IsLoadCSE)
1317  CSELoads.push_back(cast<LoadInst>(PredAvailable));
1318 
1319  // If so, this load is partially redundant. Remember this info so that we
1320  // can create a PHI node.
1321  AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
1322  }
1323 
1324  // If the loaded value isn't available in any predecessor, it isn't partially
1325  // redundant.
1326  if (AvailablePreds.empty()) return false;
1327 
1328  // Okay, the loaded value is available in at least one (and maybe all!)
1329  // predecessors. If the value is unavailable in more than one unique
1330  // predecessor, we want to insert a merge block for those common predecessors.
1331  // This ensures that we only have to insert one reload, thus not increasing
1332  // code size.
1333  BasicBlock *UnavailablePred = nullptr;
1334 
1335  // If there is exactly one predecessor where the value is unavailable, the
1336  // already computed 'OneUnavailablePred' block is it. If it ends in an
1337  // unconditional branch, we know that it isn't a critical edge.
1338  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1339  OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1340  UnavailablePred = OneUnavailablePred;
1341  } else if (PredsScanned.size() != AvailablePreds.size()) {
1342  // Otherwise, we had multiple unavailable predecessors or we had a critical
1343  // edge from the one.
1344  SmallVector<BasicBlock*, 8> PredsToSplit;
1345  SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1346 
1347  for (const auto &AvailablePred : AvailablePreds)
1348  AvailablePredSet.insert(AvailablePred.first);
1349 
1350  // Add all the unavailable predecessors to the PredsToSplit list.
1351  for (BasicBlock *P : predecessors(LoadBB)) {
1352  // If the predecessor is an indirect goto, we can't split the edge.
1353  if (isa<IndirectBrInst>(P->getTerminator()))
1354  return false;
1355 
1356  if (!AvailablePredSet.count(P))
1357  PredsToSplit.push_back(P);
1358  }
1359 
1360  // Split them out to their own block.
1361  UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1362  }
1363 
1364  // If the value isn't available in all predecessors, then there will be
1365  // exactly one where it isn't available. Insert a load on that edge and add
1366  // it to the AvailablePreds list.
1367  if (UnavailablePred) {
1368  assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1369  "Can't handle critical edge here!");
1370  LoadInst *NewVal = new LoadInst(
1371  LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1372  LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
1373  LI->getSyncScopeID(), UnavailablePred->getTerminator());
1374  NewVal->setDebugLoc(LI->getDebugLoc());
1375  if (AATags)
1376  NewVal->setAAMetadata(AATags);
1377 
1378  AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
1379  }
1380 
1381  // Now we know that each predecessor of this block has a value in
1382  // AvailablePreds, sort them for efficient access as we're walking the preds.
1383  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1384 
1385  // Create a PHI node at the start of the block for the PRE'd load value.
1386  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1387  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
1388  &LoadBB->front());
1389  PN->takeName(LI);
1390  PN->setDebugLoc(LI->getDebugLoc());
1391 
1392  // Insert new entries into the PHI for each predecessor. A single block may
1393  // have multiple entries here.
1394  for (pred_iterator PI = PB; PI != PE; ++PI) {
1395  BasicBlock *P = *PI;
1396  AvailablePredsTy::iterator I =
1397  std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
1398  std::make_pair(P, (Value*)nullptr));
1399 
1400  assert(I != AvailablePreds.end() && I->first == P &&
1401  "Didn't find entry for predecessor!");
1402 
1403  // If we have an available predecessor but it requires casting, insert the
1404  // cast in the predecessor and use the cast. Note that we have to update the
1405  // AvailablePreds vector as we go so that all of the PHI entries for this
1406  // predecessor use the same bitcast.
1407  Value *&PredV = I->second;
1408  if (PredV->getType() != LI->getType())
1409  PredV = CastInst::CreateBitOrPointerCast(PredV, LI->getType(), "",
1410  P->getTerminator());
1411 
1412  PN->addIncoming(PredV, I->first);
1413  }
1414 
1415  for (LoadInst *PredLI : CSELoads) {
1416  combineMetadataForCSE(PredLI, LI);
1417  }
1418 
1419  LI->replaceAllUsesWith(PN);
1420  LI->eraseFromParent();
1421 
1422  return true;
1423 }
1424 
1425 /// FindMostPopularDest - The specified list contains multiple possible
1426 /// threadable destinations. Pick the one that occurs the most frequently in
1427 /// the list.
1428 static BasicBlock *
1430  const SmallVectorImpl<std::pair<BasicBlock *,
1431  BasicBlock *>> &PredToDestList) {
1432  assert(!PredToDestList.empty());
1433 
1434  // Determine popularity. If there are multiple possible destinations, we
1435  // explicitly choose to ignore 'undef' destinations. We prefer to thread
1436  // blocks with known and real destinations to threading undef. We'll handle
1437  // them later if interesting.
1438  DenseMap<BasicBlock*, unsigned> DestPopularity;
1439  for (const auto &PredToDest : PredToDestList)
1440  if (PredToDest.second)
1441  DestPopularity[PredToDest.second]++;
1442 
1443  // Find the most popular dest.
1444  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
1445  BasicBlock *MostPopularDest = DPI->first;
1446  unsigned Popularity = DPI->second;
1447  SmallVector<BasicBlock*, 4> SamePopularity;
1448 
1449  for (++DPI; DPI != DestPopularity.end(); ++DPI) {
1450  // If the popularity of this entry isn't higher than the popularity we've
1451  // seen so far, ignore it.
1452  if (DPI->second < Popularity)
1453  ; // ignore.
1454  else if (DPI->second == Popularity) {
1455  // If it is the same as what we've seen so far, keep track of it.
1456  SamePopularity.push_back(DPI->first);
1457  } else {
1458  // If it is more popular, remember it.
1459  SamePopularity.clear();
1460  MostPopularDest = DPI->first;
1461  Popularity = DPI->second;
1462  }
1463  }
1464 
1465  // Okay, now we know the most popular destination. If there is more than one
1466  // destination, we need to determine one. This is arbitrary, but we need
1467  // to make a deterministic decision. Pick the first one that appears in the
1468  // successor list.
1469  if (!SamePopularity.empty()) {
1470  SamePopularity.push_back(MostPopularDest);
1471  TerminatorInst *TI = BB->getTerminator();
1472  for (unsigned i = 0; ; ++i) {
1473  assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
1474 
1475  if (!is_contained(SamePopularity, TI->getSuccessor(i)))
1476  continue;
1477 
1478  MostPopularDest = TI->getSuccessor(i);
1479  break;
1480  }
1481  }
1482 
1483  // Okay, we have finally picked the most popular destination.
1484  return MostPopularDest;
1485 }
1486 
1489  Instruction *CxtI) {
1490  // If threading this would thread across a loop header, don't even try to
1491  // thread the edge.
1492  if (LoopHeaders.count(BB))
1493  return false;
1494 
1495  PredValueInfoTy PredValues;
1496  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
1497  return false;
1498 
1499  assert(!PredValues.empty() &&
1500  "ComputeValueKnownInPredecessors returned true with no values");
1501 
1502  DEBUG(dbgs() << "IN BB: " << *BB;
1503  for (const auto &PredValue : PredValues) {
1504  dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
1505  << *PredValue.first
1506  << " for pred '" << PredValue.second->getName() << "'.\n";
1507  });
1508 
1509  // Decide what we want to thread through. Convert our list of known values to
1510  // a list of known destinations for each pred. This also discards duplicate
1511  // predecessors and keeps track of the undefined inputs (which are represented
1512  // as a null dest in the PredToDestList).
1513  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1515 
1516  BasicBlock *OnlyDest = nullptr;
1517  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1518  Constant *OnlyVal = nullptr;
1519  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1520 
1521  unsigned PredWithKnownDest = 0;
1522  for (const auto &PredValue : PredValues) {
1523  BasicBlock *Pred = PredValue.second;
1524  if (!SeenPreds.insert(Pred).second)
1525  continue; // Duplicate predecessor entry.
1526 
1527  Constant *Val = PredValue.first;
1528 
1529  BasicBlock *DestBB;
1530  if (isa<UndefValue>(Val))
1531  DestBB = nullptr;
1532  else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1533  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1534  DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1535  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1536  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1537  DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1538  } else {
1539  assert(isa<IndirectBrInst>(BB->getTerminator())
1540  && "Unexpected terminator");
1541  assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1542  DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1543  }
1544 
1545  // If we have exactly one destination, remember it for efficiency below.
1546  if (PredToDestList.empty()) {
1547  OnlyDest = DestBB;
1548  OnlyVal = Val;
1549  } else {
1550  if (OnlyDest != DestBB)
1551  OnlyDest = MultipleDestSentinel;
1552  // It possible we have same destination, but different value, e.g. default
1553  // case in switchinst.
1554  if (Val != OnlyVal)
1555  OnlyVal = MultipleVal;
1556  }
1557 
1558  // We know where this predecessor is going.
1559  ++PredWithKnownDest;
1560 
1561  // If the predecessor ends with an indirect goto, we can't change its
1562  // destination.
1563  if (isa<IndirectBrInst>(Pred->getTerminator()))
1564  continue;
1565 
1566  PredToDestList.push_back(std::make_pair(Pred, DestBB));
1567  }
1568 
1569  // If all edges were unthreadable, we fail.
1570  if (PredToDestList.empty())
1571  return false;
1572 
1573  // If all the predecessors go to a single known successor, we want to fold,
1574  // not thread. By doing so, we do not need to duplicate the current block and
1575  // also miss potential opportunities in case we dont/cant duplicate.
1576  if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1577  if (PredWithKnownDest ==
1578  (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
1579  bool SeenFirstBranchToOnlyDest = false;
1580  for (BasicBlock *SuccBB : successors(BB)) {
1581  if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest)
1582  SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1583  else
1584  SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1585  }
1586 
1587  // Finally update the terminator.
1588  TerminatorInst *Term = BB->getTerminator();
1589  BranchInst::Create(OnlyDest, Term);
1590  Term->eraseFromParent();
1591 
1592  // If the condition is now dead due to the removal of the old terminator,
1593  // erase it.
1594  if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1595  if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1596  CondInst->eraseFromParent();
1597  // We can safely replace *some* uses of the CondInst if it has
1598  // exactly one value as returned by LVI. RAUW is incorrect in the
1599  // presence of guards and assumes, that have the `Cond` as the use. This
1600  // is because we use the guards/assume to reason about the `Cond` value
1601  // at the end of block, but RAUW unconditionally replaces all uses
1602  // including the guards/assumes themselves and the uses before the
1603  // guard/assume.
1604  else if (OnlyVal && OnlyVal != MultipleVal &&
1605  CondInst->getParent() == BB)
1606  ReplaceFoldableUses(CondInst, OnlyVal);
1607  }
1608  return true;
1609  }
1610  }
1611 
1612  // Determine which is the most common successor. If we have many inputs and
1613  // this block is a switch, we want to start by threading the batch that goes
1614  // to the most popular destination first. If we only know about one
1615  // threadable destination (the common case) we can avoid this.
1616  BasicBlock *MostPopularDest = OnlyDest;
1617 
1618  if (MostPopularDest == MultipleDestSentinel)
1619  MostPopularDest = FindMostPopularDest(BB, PredToDestList);
1620 
1621  // Now that we know what the most popular destination is, factor all
1622  // predecessors that will jump to it into a single predecessor.
1623  SmallVector<BasicBlock*, 16> PredsToFactor;
1624  for (const auto &PredToDest : PredToDestList)
1625  if (PredToDest.second == MostPopularDest) {
1626  BasicBlock *Pred = PredToDest.first;
1627 
1628  // This predecessor may be a switch or something else that has multiple
1629  // edges to the block. Factor each of these edges by listing them
1630  // according to # occurrences in PredsToFactor.
1631  for (BasicBlock *Succ : successors(Pred))
1632  if (Succ == BB)
1633  PredsToFactor.push_back(Pred);
1634  }
1635 
1636  // If the threadable edges are branching on an undefined value, we get to pick
1637  // the destination that these predecessors should get to.
1638  if (!MostPopularDest)
1639  MostPopularDest = BB->getTerminator()->
1640  getSuccessor(GetBestDestForJumpOnUndef(BB));
1641 
1642  // Ok, try to thread it!
1643  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
1644 }
1645 
1646 /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
1647 /// a PHI node in the current block. See if there are any simplifications we
1648 /// can do based on inputs to the phi node.
1650  BasicBlock *BB = PN->getParent();
1651 
1652  // TODO: We could make use of this to do it once for blocks with common PHI
1653  // values.
1655  PredBBs.resize(1);
1656 
1657  // If any of the predecessor blocks end in an unconditional branch, we can
1658  // *duplicate* the conditional branch into that block in order to further
1659  // encourage jump threading and to eliminate cases where we have branch on a
1660  // phi of an icmp (branch on icmp is much better).
1661  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1662  BasicBlock *PredBB = PN->getIncomingBlock(i);
1663  if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1664  if (PredBr->isUnconditional()) {
1665  PredBBs[0] = PredBB;
1666  // Try to duplicate BB into PredBB.
1667  if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1668  return true;
1669  }
1670  }
1671 
1672  return false;
1673 }
1674 
1675 /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
1676 /// a xor instruction in the current block. See if there are any
1677 /// simplifications we can do based on inputs to the xor.
1679  BasicBlock *BB = BO->getParent();
1680 
1681  // If either the LHS or RHS of the xor is a constant, don't do this
1682  // optimization.
1683  if (isa<ConstantInt>(BO->getOperand(0)) ||
1684  isa<ConstantInt>(BO->getOperand(1)))
1685  return false;
1686 
1687  // If the first instruction in BB isn't a phi, we won't be able to infer
1688  // anything special about any particular predecessor.
1689  if (!isa<PHINode>(BB->front()))
1690  return false;
1691 
1692  // If this BB is a landing pad, we won't be able to split the edge into it.
1693  if (BB->isEHPad())
1694  return false;
1695 
1696  // If we have a xor as the branch input to this block, and we know that the
1697  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1698  // the condition into the predecessor and fix that value to true, saving some
1699  // logical ops on that path and encouraging other paths to simplify.
1700  //
1701  // This copies something like this:
1702  //
1703  // BB:
1704  // %X = phi i1 [1], [%X']
1705  // %Y = icmp eq i32 %A, %B
1706  // %Z = xor i1 %X, %Y
1707  // br i1 %Z, ...
1708  //
1709  // Into:
1710  // BB':
1711  // %Y = icmp ne i32 %A, %B
1712  // br i1 %Y, ...
1713 
1714  PredValueInfoTy XorOpValues;
1715  bool isLHS = true;
1716  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1717  WantInteger, BO)) {
1718  assert(XorOpValues.empty());
1719  if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1720  WantInteger, BO))
1721  return false;
1722  isLHS = false;
1723  }
1724 
1725  assert(!XorOpValues.empty() &&
1726  "ComputeValueKnownInPredecessors returned true with no values");
1727 
1728  // Scan the information to see which is most popular: true or false. The
1729  // predecessors can be of the set true, false, or undef.
1730  unsigned NumTrue = 0, NumFalse = 0;
1731  for (const auto &XorOpValue : XorOpValues) {
1732  if (isa<UndefValue>(XorOpValue.first))
1733  // Ignore undefs for the count.
1734  continue;
1735  if (cast<ConstantInt>(XorOpValue.first)->isZero())
1736  ++NumFalse;
1737  else
1738  ++NumTrue;
1739  }
1740 
1741  // Determine which value to split on, true, false, or undef if neither.
1742  ConstantInt *SplitVal = nullptr;
1743  if (NumTrue > NumFalse)
1744  SplitVal = ConstantInt::getTrue(BB->getContext());
1745  else if (NumTrue != 0 || NumFalse != 0)
1746  SplitVal = ConstantInt::getFalse(BB->getContext());
1747 
1748  // Collect all of the blocks that this can be folded into so that we can
1749  // factor this once and clone it once.
1750  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1751  for (const auto &XorOpValue : XorOpValues) {
1752  if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1753  continue;
1754 
1755  BlocksToFoldInto.push_back(XorOpValue.second);
1756  }
1757 
1758  // If we inferred a value for all of the predecessors, then duplication won't
1759  // help us. However, we can just replace the LHS or RHS with the constant.
1760  if (BlocksToFoldInto.size() ==
1761  cast<PHINode>(BB->front()).getNumIncomingValues()) {
1762  if (!SplitVal) {
1763  // If all preds provide undef, just nuke the xor, because it is undef too.
1765  BO->eraseFromParent();
1766  } else if (SplitVal->isZero()) {
1767  // If all preds provide 0, replace the xor with the other input.
1768  BO->replaceAllUsesWith(BO->getOperand(isLHS));
1769  BO->eraseFromParent();
1770  } else {
1771  // If all preds provide 1, set the computed value to 1.
1772  BO->setOperand(!isLHS, SplitVal);
1773  }
1774 
1775  return true;
1776  }
1777 
1778  // Try to duplicate BB into PredBB.
1779  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1780 }
1781 
1782 /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1783 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1784 /// NewPred using the entries from OldPred (suitably mapped).
1786  BasicBlock *OldPred,
1787  BasicBlock *NewPred,
1789  for (BasicBlock::iterator PNI = PHIBB->begin();
1790  PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
1791  // Ok, we have a PHI node. Figure out what the incoming value was for the
1792  // DestBlock.
1793  Value *IV = PN->getIncomingValueForBlock(OldPred);
1794 
1795  // Remap the value if necessary.
1796  if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1798  if (I != ValueMap.end())
1799  IV = I->second;
1800  }
1801 
1802  PN->addIncoming(IV, NewPred);
1803  }
1804 }
1805 
1806 /// ThreadEdge - We have decided that it is safe and profitable to factor the
1807 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
1808 /// across BB. Transform the IR to reflect this change.
1810  const SmallVectorImpl<BasicBlock *> &PredBBs,
1811  BasicBlock *SuccBB) {
1812  // If threading to the same block as we come from, we would infinite loop.
1813  if (SuccBB == BB) {
1814  DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
1815  << "' - would thread to self!\n");
1816  return false;
1817  }
1818 
1819  // If threading this would thread across a loop header, don't thread the edge.
1820  // See the comments above FindLoopHeaders for justifications and caveats.
1821  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
1822  DEBUG({
1823  bool BBIsHeader = LoopHeaders.count(BB);
1824  bool SuccIsHeader = LoopHeaders.count(SuccBB);
1825  dbgs() << " Not threading across "
1826  << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
1827  << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
1828  << SuccBB->getName() << "' - it might create an irreducible loop!\n";
1829  });
1830  return false;
1831  }
1832 
1833  unsigned JumpThreadCost =
1834  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
1835  if (JumpThreadCost > BBDupThreshold) {
1836  DEBUG(dbgs() << " Not threading BB '" << BB->getName()
1837  << "' - Cost is too high: " << JumpThreadCost << "\n");
1838  return false;
1839  }
1840 
1841  // And finally, do it! Start by factoring the predecessors if needed.
1842  BasicBlock *PredBB;
1843  if (PredBBs.size() == 1)
1844  PredBB = PredBBs[0];
1845  else {
1846  DEBUG(dbgs() << " Factoring out " << PredBBs.size()
1847  << " common predecessors.\n");
1848  PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
1849  }
1850 
1851  // And finally, do it!
1852  DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
1853  << SuccBB->getName() << "' with cost: " << JumpThreadCost
1854  << ", across block:\n "
1855  << *BB << "\n");
1856 
1857  LVI->threadEdge(PredBB, BB, SuccBB);
1858 
1859  // We are going to have to map operands from the original BB block to the new
1860  // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
1861  // account for entry from PredBB.
1863 
1864  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
1865  BB->getName()+".thread",
1866  BB->getParent(), BB);
1867  NewBB->moveAfter(PredBB);
1868 
1869  // Set the block frequency of NewBB.
1870  if (HasProfileData) {
1871  auto NewBBFreq =
1872  BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
1873  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
1874  }
1875 
1876  BasicBlock::iterator BI = BB->begin();
1877  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
1878  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
1879 
1880  // Clone the non-phi instructions of BB into NewBB, keeping track of the
1881  // mapping and using it to remap operands in the cloned instructions.
1882  for (; !isa<TerminatorInst>(BI); ++BI) {
1883  Instruction *New = BI->clone();
1884  New->setName(BI->getName());
1885  NewBB->getInstList().push_back(New);
1886  ValueMapping[&*BI] = New;
1887 
1888  // Remap operands to patch up intra-block references.
1889  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
1890  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
1891  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
1892  if (I != ValueMapping.end())
1893  New->setOperand(i, I->second);
1894  }
1895  }
1896 
1897  // We didn't copy the terminator from BB over to NewBB, because there is now
1898  // an unconditional jump to SuccBB. Insert the unconditional jump.
1899  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
1900  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
1901 
1902  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
1903  // PHI nodes for NewBB now.
1904  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
1905 
1906  // If there were values defined in BB that are used outside the block, then we
1907  // now have to update all uses of the value to use either the original value,
1908  // the cloned value, or some PHI derived value. This can require arbitrary
1909  // PHI insertion, of which we are prepared to do, clean these up now.
1910  SSAUpdater SSAUpdate;
1911  SmallVector<Use*, 16> UsesToRename;
1912  for (Instruction &I : *BB) {
1913  // Scan all uses of this instruction to see if it is used outside of its
1914  // block, and if so, record them in UsesToRename.
1915  for (Use &U : I.uses()) {
1916  Instruction *User = cast<Instruction>(U.getUser());
1917  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1918  if (UserPN->getIncomingBlock(U) == BB)
1919  continue;
1920  } else if (User->getParent() == BB)
1921  continue;
1922 
1923  UsesToRename.push_back(&U);
1924  }
1925 
1926  // If there are no uses outside the block, we're done with this instruction.
1927  if (UsesToRename.empty())
1928  continue;
1929 
1930  DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1931 
1932  // We found a use of I outside of BB. Rename all uses of I that are outside
1933  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1934  // with the two values we know.
1935  SSAUpdate.Initialize(I.getType(), I.getName());
1936  SSAUpdate.AddAvailableValue(BB, &I);
1937  SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1938 
1939  while (!UsesToRename.empty())
1940  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1941  DEBUG(dbgs() << "\n");
1942  }
1943 
1944  // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
1945  // NewBB instead of BB. This eliminates predecessors from BB, which requires
1946  // us to simplify any PHI nodes in BB.
1947  TerminatorInst *PredTerm = PredBB->getTerminator();
1948  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
1949  if (PredTerm->getSuccessor(i) == BB) {
1950  BB->removePredecessor(PredBB, true);
1951  PredTerm->setSuccessor(i, NewBB);
1952  }
1953 
1954  // At this point, the IR is fully up to date and consistent. Do a quick scan
1955  // over the new instructions and zap any that are constants or dead. This
1956  // frequently happens because of phi translation.
1957  SimplifyInstructionsInBlock(NewBB, TLI);
1958 
1959  // Update the edge weight from BB to SuccBB, which should be less than before.
1960  UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
1961 
1962  // Threaded an edge!
1963  ++NumThreads;
1964  return true;
1965 }
1966 
1967 /// Create a new basic block that will be the predecessor of BB and successor of
1968 /// all blocks in Preds. When profile data is available, update the frequency of
1969 /// this new block.
1970 BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
1971  ArrayRef<BasicBlock *> Preds,
1972  const char *Suffix) {
1973  // Collect the frequencies of all predecessors of BB, which will be used to
1974  // update the edge weight on BB->SuccBB.
1975  BlockFrequency PredBBFreq(0);
1976  if (HasProfileData)
1977  for (auto Pred : Preds)
1978  PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
1979 
1980  BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
1981 
1982  // Set the block frequency of the newly created PredBB, which is the sum of
1983  // frequencies of Preds.
1984  if (HasProfileData)
1985  BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
1986  return PredBB;
1987 }
1988 
1989 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
1990  const TerminatorInst *TI = BB->getTerminator();
1991  assert(TI->getNumSuccessors() > 1 && "not a split");
1992 
1993  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
1994  if (!WeightsNode)
1995  return false;
1996 
1997  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
1998  if (MDName->getString() != "branch_weights")
1999  return false;
2000 
2001  // Ensure there are weights for all of the successors. Note that the first
2002  // operand to the metadata node is a name, not a weight.
2003  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2004 }
2005 
2006 /// Update the block frequency of BB and branch weight and the metadata on the
2007 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2008 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
2009 void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2010  BasicBlock *BB,
2011  BasicBlock *NewBB,
2012  BasicBlock *SuccBB) {
2013  if (!HasProfileData)
2014  return;
2015 
2016  assert(BFI && BPI && "BFI & BPI should have been created here");
2017 
2018  // As the edge from PredBB to BB is deleted, we have to update the block
2019  // frequency of BB.
2020  auto BBOrigFreq = BFI->getBlockFreq(BB);
2021  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2022  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2023  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2024  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2025 
2026  // Collect updated outgoing edges' frequencies from BB and use them to update
2027  // edge probabilities.
2028  SmallVector<uint64_t, 4> BBSuccFreq;
2029  for (BasicBlock *Succ : successors(BB)) {
2030  auto SuccFreq = (Succ == SuccBB)
2031  ? BB2SuccBBFreq - NewBBFreq
2032  : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2033  BBSuccFreq.push_back(SuccFreq.getFrequency());
2034  }
2035 
2036  uint64_t MaxBBSuccFreq =
2037  *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2038 
2040  if (MaxBBSuccFreq == 0)
2041  BBSuccProbs.assign(BBSuccFreq.size(),
2042  {1, static_cast<unsigned>(BBSuccFreq.size())});
2043  else {
2044  for (uint64_t Freq : BBSuccFreq)
2045  BBSuccProbs.push_back(
2046  BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2047  // Normalize edge probabilities so that they sum up to one.
2048  BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2049  BBSuccProbs.end());
2050  }
2051 
2052  // Update edge probabilities in BPI.
2053  for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
2054  BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
2055 
2056  // Update the profile metadata as well.
2057  //
2058  // Don't do this if the profile of the transformed blocks was statically
2059  // estimated. (This could occur despite the function having an entry
2060  // frequency in completely cold parts of the CFG.)
2061  //
2062  // In this case we don't want to suggest to subsequent passes that the
2063  // calculated weights are fully consistent. Consider this graph:
2064  //
2065  // check_1
2066  // 50% / |
2067  // eq_1 | 50%
2068  // \ |
2069  // check_2
2070  // 50% / |
2071  // eq_2 | 50%
2072  // \ |
2073  // check_3
2074  // 50% / |
2075  // eq_3 | 50%
2076  // \ |
2077  //
2078  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2079  // the overall probabilities are inconsistent; the total probability that the
2080  // value is either 1, 2 or 3 is 150%.
2081  //
2082  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2083  // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2084  // the loop exit edge. Then based solely on static estimation we would assume
2085  // the loop was extremely hot.
2086  //
2087  // FIXME this locally as well so that BPI and BFI are consistent as well. We
2088  // shouldn't make edges extremely likely or unlikely based solely on static
2089  // estimation.
2090  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
2091  SmallVector<uint32_t, 4> Weights;
2092  for (auto Prob : BBSuccProbs)
2093  Weights.push_back(Prob.getNumerator());
2094 
2095  auto TI = BB->getTerminator();
2096  TI->setMetadata(
2098  MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2099  }
2100 }
2101 
2102 /// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2103 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2104 /// If we can duplicate the contents of BB up into PredBB do so now, this
2105 /// improves the odds that the branch will be on an analyzable instruction like
2106 /// a compare.
2108  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2109  assert(!PredBBs.empty() && "Can't handle an empty set");
2110 
2111  // If BB is a loop header, then duplicating this block outside the loop would
2112  // cause us to transform this into an irreducible loop, don't do this.
2113  // See the comments above FindLoopHeaders for justifications and caveats.
2114  if (LoopHeaders.count(BB)) {
2115  DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2116  << "' into predecessor block '" << PredBBs[0]->getName()
2117  << "' - it might create an irreducible loop!\n");
2118  return false;
2119  }
2120 
2121  unsigned DuplicationCost =
2122  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2123  if (DuplicationCost > BBDupThreshold) {
2124  DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2125  << "' - Cost is too high: " << DuplicationCost << "\n");
2126  return false;
2127  }
2128 
2129  // And finally, do it! Start by factoring the predecessors if needed.
2130  BasicBlock *PredBB;
2131  if (PredBBs.size() == 1)
2132  PredBB = PredBBs[0];
2133  else {
2134  DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2135  << " common predecessors.\n");
2136  PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
2137  }
2138 
2139  // Okay, we decided to do this! Clone all the instructions in BB onto the end
2140  // of PredBB.
2141  DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
2142  << PredBB->getName() << "' to eliminate branch on phi. Cost: "
2143  << DuplicationCost << " block is:" << *BB << "\n");
2144 
2145  // Unless PredBB ends with an unconditional branch, split the edge so that we
2146  // can just clone the bits from BB into the end of the new PredBB.
2147  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2148 
2149  if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2150  PredBB = SplitEdge(PredBB, BB);
2151  OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2152  }
2153 
2154  // We are going to have to map operands from the original BB block into the
2155  // PredBB block. Evaluate PHI nodes in BB.
2157 
2158  BasicBlock::iterator BI = BB->begin();
2159  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2160  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2161  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2162  // mapping and using it to remap operands in the cloned instructions.
2163  for (; BI != BB->end(); ++BI) {
2164  Instruction *New = BI->clone();
2165 
2166  // Remap operands to patch up intra-block references.
2167  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2168  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2169  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2170  if (I != ValueMapping.end())
2171  New->setOperand(i, I->second);
2172  }
2173 
2174  // If this instruction can be simplified after the operands are updated,
2175  // just use the simplified value instead. This frequently happens due to
2176  // phi translation.
2177  if (Value *IV = SimplifyInstruction(
2178  New,
2179  {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2180  ValueMapping[&*BI] = IV;
2181  if (!New->mayHaveSideEffects()) {
2182  New->deleteValue();
2183  New = nullptr;
2184  }
2185  } else {
2186  ValueMapping[&*BI] = New;
2187  }
2188  if (New) {
2189  // Otherwise, insert the new instruction into the block.
2190  New->setName(BI->getName());
2191  PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2192  }
2193  }
2194 
2195  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2196  // add entries to the PHI nodes for branch from PredBB now.
2197  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2198  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2199  ValueMapping);
2200  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2201  ValueMapping);
2202 
2203  // If there were values defined in BB that are used outside the block, then we
2204  // now have to update all uses of the value to use either the original value,
2205  // the cloned value, or some PHI derived value. This can require arbitrary
2206  // PHI insertion, of which we are prepared to do, clean these up now.
2207  SSAUpdater SSAUpdate;
2208  SmallVector<Use*, 16> UsesToRename;
2209  for (Instruction &I : *BB) {
2210  // Scan all uses of this instruction to see if it is used outside of its
2211  // block, and if so, record them in UsesToRename.
2212  for (Use &U : I.uses()) {
2213  Instruction *User = cast<Instruction>(U.getUser());
2214  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2215  if (UserPN->getIncomingBlock(U) == BB)
2216  continue;
2217  } else if (User->getParent() == BB)
2218  continue;
2219 
2220  UsesToRename.push_back(&U);
2221  }
2222 
2223  // If there are no uses outside the block, we're done with this instruction.
2224  if (UsesToRename.empty())
2225  continue;
2226 
2227  DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2228 
2229  // We found a use of I outside of BB. Rename all uses of I that are outside
2230  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2231  // with the two values we know.
2232  SSAUpdate.Initialize(I.getType(), I.getName());
2233  SSAUpdate.AddAvailableValue(BB, &I);
2234  SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
2235 
2236  while (!UsesToRename.empty())
2237  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2238  DEBUG(dbgs() << "\n");
2239  }
2240 
2241  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2242  // that we nuked.
2243  BB->removePredecessor(PredBB, true);
2244 
2245  // Remove the unconditional branch at the end of the PredBB block.
2246  OldPredBranch->eraseFromParent();
2247 
2248  ++NumDupes;
2249  return true;
2250 }
2251 
2252 /// TryToUnfoldSelect - Look for blocks of the form
2253 /// bb1:
2254 /// %a = select
2255 /// br bb2
2256 ///
2257 /// bb2:
2258 /// %p = phi [%a, %bb1] ...
2259 /// %c = icmp %p
2260 /// br i1 %c
2261 ///
2262 /// And expand the select into a branch structure if one of its arms allows %c
2263 /// to be folded. This later enables threading from bb1 over bb2.
2265  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2266  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2267  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2268 
2269  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2270  CondLHS->getParent() != BB)
2271  return false;
2272 
2273  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2274  BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2275  SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2276 
2277  // Look if one of the incoming values is a select in the corresponding
2278  // predecessor.
2279  if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2280  continue;
2281 
2282  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2283  if (!PredTerm || !PredTerm->isUnconditional())
2284  continue;
2285 
2286  // Now check if one of the select values would allow us to constant fold the
2287  // terminator in BB. We don't do the transform if both sides fold, those
2288  // cases will be threaded in any case.
2289  LazyValueInfo::Tristate LHSFolds =
2290  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2291  CondRHS, Pred, BB, CondCmp);
2292  LazyValueInfo::Tristate RHSFolds =
2293  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2294  CondRHS, Pred, BB, CondCmp);
2295  if ((LHSFolds != LazyValueInfo::Unknown ||
2296  RHSFolds != LazyValueInfo::Unknown) &&
2297  LHSFolds != RHSFolds) {
2298  // Expand the select.
2299  //
2300  // Pred --
2301  // | v
2302  // | NewBB
2303  // | |
2304  // |-----
2305  // v
2306  // BB
2307  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2308  BB->getParent(), BB);
2309  // Move the unconditional branch to NewBB.
2310  PredTerm->removeFromParent();
2311  NewBB->getInstList().insert(NewBB->end(), PredTerm);
2312  // Create a conditional branch and update PHI nodes.
2313  BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2314  CondLHS->setIncomingValue(I, SI->getFalseValue());
2315  CondLHS->addIncoming(SI->getTrueValue(), NewBB);
2316  // The select is now dead.
2317  SI->eraseFromParent();
2318 
2319  // Update any other PHI nodes in BB.
2320  for (BasicBlock::iterator BI = BB->begin();
2321  PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2322  if (Phi != CondLHS)
2323  Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2324  return true;
2325  }
2326  }
2327  return false;
2328 }
2329 
2330 /// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2331 /// same BB in the form
2332 /// bb:
2333 /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2334 /// %s = select %p, trueval, falseval
2335 ///
2336 /// or
2337 ///
2338 /// bb:
2339 /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2340 /// %c = cmp %p, 0
2341 /// %s = select %c, trueval, falseval
2342 ///
2343 /// And expand the select into a branch structure. This later enables
2344 /// jump-threading over bb in this pass.
2345 ///
2346 /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2347 /// select if the associated PHI has at least one constant. If the unfolded
2348 /// select is not jump-threaded, it will be folded again in the later
2349 /// optimizations.
2351  // If threading this would thread across a loop header, don't thread the edge.
2352  // See the comments above FindLoopHeaders for justifications and caveats.
2353  if (LoopHeaders.count(BB))
2354  return false;
2355 
2356  for (BasicBlock::iterator BI = BB->begin();
2357  PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2358  // Look for a Phi having at least one constant incoming value.
2359  if (llvm::all_of(PN->incoming_values(),
2360  [](Value *V) { return !isa<ConstantInt>(V); }))
2361  continue;
2362 
2363  auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2364  // Check if SI is in BB and use V as condition.
2365  if (SI->getParent() != BB)
2366  return false;
2367  Value *Cond = SI->getCondition();
2368  return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
2369  };
2370 
2371  SelectInst *SI = nullptr;
2372  for (Use &U : PN->uses()) {
2373  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2374  // Look for a ICmp in BB that compares PN with a constant and is the
2375  // condition of a Select.
2376  if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2377  isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2378  if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2379  if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2380  SI = SelectI;
2381  break;
2382  }
2383  } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2384  // Look for a Select in BB that uses PN as condtion.
2385  if (isUnfoldCandidate(SelectI, U.get())) {
2386  SI = SelectI;
2387  break;
2388  }
2389  }
2390  }
2391 
2392  if (!SI)
2393  continue;
2394  // Expand the select.
2395  TerminatorInst *Term =
2396  SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
2397  PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2398  NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2399  NewPN->addIncoming(SI->getFalseValue(), BB);
2400  SI->replaceAllUsesWith(NewPN);
2401  SI->eraseFromParent();
2402  return true;
2403  }
2404  return false;
2405 }
2406 
2407 /// Try to propagate a guard from the current BB into one of its predecessors
2408 /// in case if another branch of execution implies that the condition of this
2409 /// guard is always true. Currently we only process the simplest case that
2410 /// looks like:
2411 ///
2412 /// Start:
2413 /// %cond = ...
2414 /// br i1 %cond, label %T1, label %F1
2415 /// T1:
2416 /// br label %Merge
2417 /// F1:
2418 /// br label %Merge
2419 /// Merge:
2420 /// %condGuard = ...
2421 /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2422 ///
2423 /// And cond either implies condGuard or !condGuard. In this case all the
2424 /// instructions before the guard can be duplicated in both branches, and the
2425 /// guard is then threaded to one of them.
2427  using namespace PatternMatch;
2428 
2429  // We only want to deal with two predecessors.
2430  BasicBlock *Pred1, *Pred2;
2431  auto PI = pred_begin(BB), PE = pred_end(BB);
2432  if (PI == PE)
2433  return false;
2434  Pred1 = *PI++;
2435  if (PI == PE)
2436  return false;
2437  Pred2 = *PI++;
2438  if (PI != PE)
2439  return false;
2440  if (Pred1 == Pred2)
2441  return false;
2442 
2443  // Try to thread one of the guards of the block.
2444  // TODO: Look up deeper than to immediate predecessor?
2445  auto *Parent = Pred1->getSinglePredecessor();
2446  if (!Parent || Parent != Pred2->getSinglePredecessor())
2447  return false;
2448 
2449  if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
2450  for (auto &I : *BB)
2451  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
2452  if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
2453  return true;
2454 
2455  return false;
2456 }
2457 
2458 /// Try to propagate the guard from BB which is the lower block of a diamond
2459 /// to one of its branches, in case if diamond's condition implies guard's
2460 /// condition.
2462  BranchInst *BI) {
2463  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
2464  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
2465  Value *GuardCond = Guard->getArgOperand(0);
2466  Value *BranchCond = BI->getCondition();
2467  BasicBlock *TrueDest = BI->getSuccessor(0);
2468  BasicBlock *FalseDest = BI->getSuccessor(1);
2469 
2470  auto &DL = BB->getModule()->getDataLayout();
2471  bool TrueDestIsSafe = false;
2472  bool FalseDestIsSafe = false;
2473 
2474  // True dest is safe if BranchCond => GuardCond.
2475  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
2476  if (Impl && *Impl)
2477  TrueDestIsSafe = true;
2478  else {
2479  // False dest is safe if !BranchCond => GuardCond.
2480  Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
2481  if (Impl && *Impl)
2482  FalseDestIsSafe = true;
2483  }
2484 
2485  if (!TrueDestIsSafe && !FalseDestIsSafe)
2486  return false;
2487 
2488  BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
2489  BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
2490 
2491  ValueToValueMapTy UnguardedMapping, GuardedMapping;
2492  Instruction *AfterGuard = Guard->getNextNode();
2493  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
2494  if (Cost > BBDupThreshold)
2495  return false;
2496  // Duplicate all instructions before the guard and the guard itself to the
2497  // branch where implication is not proved.
2498  GuardedBlock = DuplicateInstructionsInSplitBetween(
2499  BB, GuardedBlock, AfterGuard, GuardedMapping);
2500  assert(GuardedBlock && "Could not create the guarded block?");
2501  // Duplicate all instructions before the guard in the unguarded branch.
2502  // Since we have successfully duplicated the guarded block and this block
2503  // has fewer instructions, we expect it to succeed.
2504  UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
2505  Guard, UnguardedMapping);
2506  assert(UnguardedBlock && "Could not create the unguarded block?");
2507  DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
2508  << GuardedBlock->getName() << "\n");
2509 
2510  // Some instructions before the guard may still have uses. For them, we need
2511  // to create Phi nodes merging their copies in both guarded and unguarded
2512  // branches. Those instructions that have no uses can be just removed.
2514  for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
2515  if (!isa<PHINode>(&*BI))
2516  ToRemove.push_back(&*BI);
2517 
2518  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
2519  assert(InsertionPoint && "Empty block?");
2520  // Substitute with Phis & remove.
2521  for (auto *Inst : reverse(ToRemove)) {
2522  if (!Inst->use_empty()) {
2523  PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
2524  NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
2525  NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
2526  NewPN->insertBefore(InsertionPoint);
2527  Inst->replaceAllUsesWith(NewPN);
2528  }
2529  Inst->eraseFromParent();
2530  }
2531  return true;
2532 }
Legacy wrapper pass to provide the GlobalsAAResult object.
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock *> &PredBBs, BasicBlock *SuccBB)
ThreadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:850
iterator_range< use_iterator > uses()
Definition: Value.h:350
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs=false)
Notify the BasicBlock that the predecessor Pred is no longer able to reach it.
Definition: BasicBlock.cpp:276
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BranchProbability getCompl() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Wrapper around LazyValueInfo.
This is the interface for a simple mod/ref and alias analysis over globals.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type &#39;Ty&#39;.
Definition: SSAUpdater.cpp:54
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
iterator end()
Definition: Function.h:590
void initializeJumpThreadingPass(PassRegistry &)
void DeleteDeadBlock(BasicBlock *BB)
Delete the specified block, which must have no predecessors.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:67
This class represents a function call, abstracting a target machine&#39;s calling convention.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
This file contains the declarations for metadata subclasses.
const Value * getTrueValue() const
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!)...
Definition: Local.cpp:587
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:233
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
bool isTerminator() const
Definition: Instruction.h:128
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:93
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:816
Value * FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AliasAnalysis *AA, bool *IsLoad, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:336
static BasicBlock * FindMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock *>> &PredToDestList)
FindMostPopularDest - The specified list contains multiple possible threadable destinations.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:504
Metadata node.
Definition: Metadata.h:862
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
This defines the Use class.
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:689
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
FunctionPass * createJumpThreadingPass(int Threshold=-1)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
The address of a basic block.
Definition: Constants.h:813
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:591
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1247
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
unsigned getNumSuccessors() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:70
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
jump threading
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:284
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
bool TryToUnfoldSelectInCurrBB(BasicBlock *BB)
TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:502
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:427
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:248
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
static Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible. ...
Definition: Constants.cpp:1711
Value * SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
Definition: Sink.cpp:201
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
iterator begin()
Definition: Function.h:588
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Value * getOperand(unsigned i) const
Definition: User.h:154
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:475
StringRef getString() const
Definition: Metadata.cpp:456
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:769
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:171
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
void setSuccessor(unsigned idx, BasicBlock *B)
Update the specified successor to point at the provided block.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:75
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes...
Definition: Local.cpp:837
Conditional or Unconditional Branch instruction.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1339
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:264
Indirect Branch Instruction.
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:504
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, bool HasProfileData_, std::unique_ptr< BlockFrequencyInfo > BFI_, std::unique_ptr< BranchProbabilityInfo > BPI_)
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
bool isUnordered() const
Definition: Instructions.h:264
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:823
This instruction compares its operands according to the predicate given to the constructor.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:860
jump Jump Threading
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:374
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:107
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2109
void FindLoopHeaders(Function &F)
FindLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
const Value * getCondition() const
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1325
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:527
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump...
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:63
bool ProcessBranchOnPHI(PHINode *PN)
ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node in the curren...
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
size_type size() const
Definition: SmallPtrSet.h:92
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:317
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:110
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
See the file comment.
Definition: ValueMap.h:86
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:410
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
iterator end()
Definition: BasicBlock.h:254
bool isExceptional() const
Definition: InstrTypes.h:84
bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI=nullptr)
Remove all blocks that can not be reached from the function&#39;s entry.
Definition: Local.cpp:1677
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
This pass performs &#39;jump threading&#39;, which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:76
This class represents a range of values.
Definition: ConstantRange.h:47
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
TerminatorInst * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
bool SimplifyPartiallyRedundantLoad(LoadInst *LI)
SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant load instruction, eliminate it by replacing it with a PHI node.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:172
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
void push_back(pointer val)
Definition: ilist.h:326
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1435
const Value * getFalseValue() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:65
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:934
bool hasValue() const
Definition: Optional.h:133
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:529
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Analysis providing branch probability information.
iterator insert(iterator where, pointer New)
Definition: ilist.h:241
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:656
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
iterator begin()
Definition: DenseMap.h:70
bool ProcessBranchOnXOR(BinaryOperator *BO)
ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:245
bool ProcessImpliedCondition(BasicBlock *BB)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal)
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void combineMetadataForCSE(Instruction *K, const Instruction *J)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:1776
bool isUnconditional() const
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value *> &ValueMap)
AddPHINodeEntriesForMappedBlock - We&#39;re adding &#39;NewPred&#39; as a new predecessor to the PHIBB block...
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
Optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock *> > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them...
Definition: CFG.cpp:27
Analysis pass providing the TargetLibraryInfo.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Helper struct that represents how a value is mapped through different register banks.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
See the comments on JumpThreadingPass.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:383
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches...
bool ProcessGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:293
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
bool ProcessBlock(BasicBlock *BB)
ProcessBlock - If there are any predecessors whose control can be threaded through to a successor...
static const Function * getParent(const Value *V)
#define DEBUG(X)
Definition: Debug.h:118
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the edge connecting specified block.
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
A single uniqued string.
Definition: Metadata.h:602
A container for analyses that lazily runs them and caches their results.
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:178
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
This header defines various interfaces for pass management in LLVM.
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_END(JumpThreading
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1303
op_range incoming_values()
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock *> &PredBBs)
DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
static bool hasAddressTakenAndUsed(BasicBlock *BB)
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
uint32_t getNumerator() const
bool use_empty() const
Definition: Value.h:322
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:1807
Analysis to compute lazy value information.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
TryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2.
void resize(size_type N)
Definition: SmallVector.h:355
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:870