LLVM  14.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Jump Threading pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Intrinsics.h"
51 #include "llvm/IR/LLVMContext.h"
52 #include "llvm/IR/MDBuilder.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/IR/Module.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/PatternMatch.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/Use.h"
59 #include "llvm/IR/User.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <memory>
81 #include <utility>
82 
83 using namespace llvm;
84 using namespace jumpthreading;
85 
86 #define DEBUG_TYPE "jump-threading"
87 
88 STATISTIC(NumThreads, "Number of jumps threaded");
89 STATISTIC(NumFolds, "Number of terminators folded");
90 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
91 
92 static cl::opt<unsigned>
93 BBDuplicateThreshold("jump-threading-threshold",
94  cl::desc("Max block size to duplicate for jump threading"),
95  cl::init(6), cl::Hidden);
96 
97 static cl::opt<unsigned>
99  "jump-threading-implication-search-threshold",
100  cl::desc("The number of predecessors to search for a stronger "
101  "condition to use to thread over a weaker condition"),
102  cl::init(3), cl::Hidden);
103 
105  "print-lvi-after-jump-threading",
106  cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
107  cl::Hidden);
108 
110  "jump-threading-freeze-select-cond",
111  cl::desc("Freeze the condition when unfolding select"), cl::init(false),
112  cl::Hidden);
113 
115  "jump-threading-across-loop-headers",
116  cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
117  cl::init(false), cl::Hidden);
118 
119 
120 namespace {
121 
122  /// This pass performs 'jump threading', which looks at blocks that have
123  /// multiple predecessors and multiple successors. If one or more of the
124  /// predecessors of the block can be proven to always jump to one of the
125  /// successors, we forward the edge from the predecessor to the successor by
126  /// duplicating the contents of this block.
127  ///
128  /// An example of when this can occur is code like this:
129  ///
130  /// if () { ...
131  /// X = 4;
132  /// }
133  /// if (X < 3) {
134  ///
135  /// In this case, the unconditional branch at the end of the first if can be
136  /// revectored to the false side of the second if.
137  class JumpThreading : public FunctionPass {
138  JumpThreadingPass Impl;
139 
140  public:
141  static char ID; // Pass identification
142 
143  JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
144  : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
146  }
147 
148  bool runOnFunction(Function &F) override;
149 
150  void getAnalysisUsage(AnalysisUsage &AU) const override {
159  }
160 
161  void releaseMemory() override { Impl.releaseMemory(); }
162  };
163 
164 } // end anonymous namespace
165 
166 char JumpThreading::ID = 0;
167 
168 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
169  "Jump Threading", false, false)
176 
177 // Public interface to the Jump Threading pass
179  return new JumpThreading(InsertFr, Threshold);
180 }
181 
183  InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
184  DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
185 }
186 
187 // Update branch probability information according to conditional
188 // branch probability. This is usually made possible for cloned branches
189 // in inline instances by the context specific profile in the caller.
190 // For instance,
191 //
192 // [Block PredBB]
193 // [Branch PredBr]
194 // if (t) {
195 // Block A;
196 // } else {
197 // Block B;
198 // }
199 //
200 // [Block BB]
201 // cond = PN([true, %A], [..., %B]); // PHI node
202 // [Branch CondBr]
203 // if (cond) {
204 // ... // P(cond == true) = 1%
205 // }
206 //
207 // Here we know that when block A is taken, cond must be true, which means
208 // P(cond == true | A) = 1
209 //
210 // Given that P(cond == true) = P(cond == true | A) * P(A) +
211 // P(cond == true | B) * P(B)
212 // we get:
213 // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
214 //
215 // which gives us:
216 // P(A) is less than P(cond == true), i.e.
217 // P(t == true) <= P(cond == true)
218 //
219 // In other words, if we know P(cond == true) is unlikely, we know
220 // that P(t == true) is also unlikely.
221 //
223  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
224  if (!CondBr)
225  return;
226 
227  uint64_t TrueWeight, FalseWeight;
228  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
229  return;
230 
231  if (TrueWeight + FalseWeight == 0)
232  // Zero branch_weights do not give a hint for getting branch probabilities.
233  // Technically it would result in division by zero denominator, which is
234  // TrueWeight + FalseWeight.
235  return;
236 
237  // Returns the outgoing edge of the dominating predecessor block
238  // that leads to the PhiNode's incoming block:
239  auto GetPredOutEdge =
240  [](BasicBlock *IncomingBB,
241  BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
242  auto *PredBB = IncomingBB;
243  auto *SuccBB = PhiBB;
245  while (true) {
246  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
247  if (PredBr && PredBr->isConditional())
248  return {PredBB, SuccBB};
249  Visited.insert(PredBB);
250  auto *SinglePredBB = PredBB->getSinglePredecessor();
251  if (!SinglePredBB)
252  return {nullptr, nullptr};
253 
254  // Stop searching when SinglePredBB has been visited. It means we see
255  // an unreachable loop.
256  if (Visited.count(SinglePredBB))
257  return {nullptr, nullptr};
258 
259  SuccBB = PredBB;
260  PredBB = SinglePredBB;
261  }
262  };
263 
264  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
265  Value *PhiOpnd = PN->getIncomingValue(i);
266  ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
267 
268  if (!CI || !CI->getType()->isIntegerTy(1))
269  continue;
270 
271  BranchProbability BP =
273  TrueWeight, TrueWeight + FalseWeight)
275  FalseWeight, TrueWeight + FalseWeight));
276 
277  auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
278  if (!PredOutEdge.first)
279  return;
280 
281  BasicBlock *PredBB = PredOutEdge.first;
282  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
283  if (!PredBr)
284  return;
285 
286  uint64_t PredTrueWeight, PredFalseWeight;
287  // FIXME: We currently only set the profile data when it is missing.
288  // With PGO, this can be used to refine even existing profile data with
289  // context information. This needs to be done after more performance
290  // testing.
291  if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
292  continue;
293 
294  // We can not infer anything useful when BP >= 50%, because BP is the
295  // upper bound probability value.
296  if (BP >= BranchProbability(50, 100))
297  continue;
298 
299  SmallVector<uint32_t, 2> Weights;
300  if (PredBr->getSuccessor(0) == PredOutEdge.second) {
301  Weights.push_back(BP.getNumerator());
302  Weights.push_back(BP.getCompl().getNumerator());
303  } else {
304  Weights.push_back(BP.getCompl().getNumerator());
305  Weights.push_back(BP.getNumerator());
306  }
307  PredBr->setMetadata(LLVMContext::MD_prof,
308  MDBuilder(PredBr->getParent()->getContext())
309  .createBranchWeights(Weights));
310  }
311 }
312 
313 /// runOnFunction - Toplevel algorithm.
315  if (skipFunction(F))
316  return false;
317  auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
318  // Jump Threading has no sense for the targets with divergent CF
319  if (TTI->hasBranchDivergence())
320  return false;
321  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
322  auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
323  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
324  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
326  std::unique_ptr<BlockFrequencyInfo> BFI;
327  std::unique_ptr<BranchProbabilityInfo> BPI;
328  if (F.hasProfileData()) {
329  LoopInfo LI{DominatorTree(F)};
330  BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
331  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
332  }
333 
334  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
335  std::move(BFI), std::move(BPI));
337  dbgs() << "LVI for function '" << F.getName() << "':\n";
338  LVI->printLVI(F, DTU.getDomTree(), dbgs());
339  }
340  return Changed;
341 }
342 
345  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
346  // Jump Threading has no sense for the targets with divergent CF
347  if (TTI.hasBranchDivergence())
348  return PreservedAnalyses::all();
349  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
350  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
351  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
352  auto &AA = AM.getResult<AAManager>(F);
354 
355  std::unique_ptr<BlockFrequencyInfo> BFI;
356  std::unique_ptr<BranchProbabilityInfo> BPI;
357  if (F.hasProfileData()) {
358  LoopInfo LI{DominatorTree(F)};
359  BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
360  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
361  }
362 
363  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
364  std::move(BFI), std::move(BPI));
365 
367  dbgs() << "LVI for function '" << F.getName() << "':\n";
368  LVI.printLVI(F, DTU.getDomTree(), dbgs());
369  }
370 
371  if (!Changed)
372  return PreservedAnalyses::all();
376  return PA;
377 }
378 
380  LazyValueInfo *LVI_, AliasAnalysis *AA_,
381  DomTreeUpdater *DTU_, bool HasProfileData_,
382  std::unique_ptr<BlockFrequencyInfo> BFI_,
383  std::unique_ptr<BranchProbabilityInfo> BPI_) {
384  LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
385  TLI = TLI_;
386  LVI = LVI_;
387  AA = AA_;
388  DTU = DTU_;
389  BFI.reset();
390  BPI.reset();
391  // When profile data is available, we need to update edge weights after
392  // successful jump threading, which requires both BPI and BFI being available.
393  HasProfileData = HasProfileData_;
394  auto *GuardDecl = F.getParent()->getFunction(
395  Intrinsic::getName(Intrinsic::experimental_guard));
396  HasGuards = GuardDecl && !GuardDecl->use_empty();
397  if (HasProfileData) {
398  BPI = std::move(BPI_);
399  BFI = std::move(BFI_);
400  }
401 
402  // Reduce the number of instructions duplicated when optimizing strictly for
403  // size.
404  if (BBDuplicateThreshold.getNumOccurrences())
405  BBDupThreshold = BBDuplicateThreshold;
406  else if (F.hasFnAttribute(Attribute::MinSize))
407  BBDupThreshold = 3;
408  else
409  BBDupThreshold = DefaultBBDupThreshold;
410 
411  // JumpThreading must not processes blocks unreachable from entry. It's a
412  // waste of compute time and can potentially lead to hangs.
413  SmallPtrSet<BasicBlock *, 16> Unreachable;
414  assert(DTU && "DTU isn't passed into JumpThreading before using it.");
415  assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
416  DominatorTree &DT = DTU->getDomTree();
417  for (auto &BB : F)
418  if (!DT.isReachableFromEntry(&BB))
419  Unreachable.insert(&BB);
420 
422  findLoopHeaders(F);
423 
424  bool EverChanged = false;
425  bool Changed;
426  do {
427  Changed = false;
428  for (auto &BB : F) {
429  if (Unreachable.count(&BB))
430  continue;
431  while (processBlock(&BB)) // Thread all of the branches we can over BB.
432  Changed = true;
433 
434  // Jump threading may have introduced redundant debug values into BB
435  // which should be removed.
436  if (Changed)
438 
439  // Stop processing BB if it's the entry or is now deleted. The following
440  // routines attempt to eliminate BB and locating a suitable replacement
441  // for the entry is non-trivial.
442  if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
443  continue;
444 
445  if (pred_empty(&BB)) {
446  // When processBlock makes BB unreachable it doesn't bother to fix up
447  // the instructions in it. We must remove BB to prevent invalid IR.
448  LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
449  << "' with terminator: " << *BB.getTerminator()
450  << '\n');
451  LoopHeaders.erase(&BB);
452  LVI->eraseBlock(&BB);
453  DeleteDeadBlock(&BB, DTU);
454  Changed = true;
455  continue;
456  }
457 
458  // processBlock doesn't thread BBs with unconditional TIs. However, if BB
459  // is "almost empty", we attempt to merge BB with its sole successor.
460  auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
461  if (BI && BI->isUnconditional()) {
462  BasicBlock *Succ = BI->getSuccessor(0);
463  if (
464  // The terminator must be the only non-phi instruction in BB.
465  BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
466  // Don't alter Loop headers and latches to ensure another pass can
467  // detect and transform nested loops later.
468  !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
471  // BB is valid for cleanup here because we passed in DTU. F remains
472  // BB's parent until a DTU->getDomTree() event.
473  LVI->eraseBlock(&BB);
474  Changed = true;
475  }
476  }
477  }
478  EverChanged |= Changed;
479  } while (Changed);
480 
481  LoopHeaders.clear();
482  return EverChanged;
483 }
484 
485 // Replace uses of Cond with ToVal when safe to do so. If all uses are
486 // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
487 // because we may incorrectly replace uses when guards/assumes are uses of
488 // of `Cond` and we used the guards/assume to reason about the `Cond` value
489 // at the end of block. RAUW unconditionally replaces all uses
490 // including the guards/assumes themselves and the uses before the
491 // guard/assume.
492 static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
493  assert(Cond->getType() == ToVal->getType());
494  auto *BB = Cond->getParent();
495  // We can unconditionally replace all uses in non-local blocks (i.e. uses
496  // strictly dominated by BB), since LVI information is true from the
497  // terminator of BB.
499  for (Instruction &I : reverse(*BB)) {
500  // Reached the Cond whose uses we are trying to replace, so there are no
501  // more uses.
502  if (&I == Cond)
503  break;
504  // We only replace uses in instructions that are guaranteed to reach the end
505  // of BB, where we know Cond is ToVal.
507  break;
508  I.replaceUsesOfWith(Cond, ToVal);
509  }
510  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
511  Cond->eraseFromParent();
512 }
513 
514 /// Return the cost of duplicating a piece of this block from first non-phi
515 /// and before StopAt instruction to thread across it. Stop scanning the block
516 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
518  Instruction *StopAt,
519  unsigned Threshold) {
520  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
521  /// Ignore PHI nodes, these will be flattened when duplication happens.
522  BasicBlock::const_iterator I(BB->getFirstNonPHI());
523 
524  // FIXME: THREADING will delete values that are just used to compute the
525  // branch, so they shouldn't count against the duplication cost.
526 
527  unsigned Bonus = 0;
528  if (BB->getTerminator() == StopAt) {
529  // Threading through a switch statement is particularly profitable. If this
530  // block ends in a switch, decrease its cost to make it more likely to
531  // happen.
532  if (isa<SwitchInst>(StopAt))
533  Bonus = 6;
534 
535  // The same holds for indirect branches, but slightly more so.
536  if (isa<IndirectBrInst>(StopAt))
537  Bonus = 8;
538  }
539 
540  // Bump the threshold up so the early exit from the loop doesn't skip the
541  // terminator-based Size adjustment at the end.
542  Threshold += Bonus;
543 
544  // Sum up the cost of each instruction until we get to the terminator. Don't
545  // include the terminator because the copy won't include it.
546  unsigned Size = 0;
547  for (; &*I != StopAt; ++I) {
548 
549  // Stop scanning the block if we've reached the threshold.
550  if (Size > Threshold)
551  return Size;
552 
553  // Debugger intrinsics don't incur code size.
554  if (isa<DbgInfoIntrinsic>(I)) continue;
555 
556  // Pseudo-probes don't incur code size.
557  if (isa<PseudoProbeInst>(I))
558  continue;
559 
560  // If this is a pointer->pointer bitcast, it is free.
561  if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
562  continue;
563 
564  // Freeze instruction is free, too.
565  if (isa<FreezeInst>(I))
566  continue;
567 
568  // Bail out if this instruction gives back a token type, it is not possible
569  // to duplicate it if it is used outside this BB.
570  if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
571  return ~0U;
572 
573  // All other instructions count for at least one unit.
574  ++Size;
575 
576  // Calls are more expensive. If they are non-intrinsic calls, we model them
577  // as having cost of 4. If they are a non-vector intrinsic, we model them
578  // as having cost of 2 total, and if they are a vector intrinsic, we model
579  // them as having cost 1.
580  if (const CallInst *CI = dyn_cast<CallInst>(I)) {
581  if (CI->cannotDuplicate() || CI->isConvergent())
582  // Blocks with NoDuplicate are modelled as having infinite cost, so they
583  // are never duplicated.
584  return ~0U;
585  else if (!isa<IntrinsicInst>(CI))
586  Size += 3;
587  else if (!CI->getType()->isVectorTy())
588  Size += 1;
589  }
590  }
591 
592  return Size > Bonus ? Size - Bonus : 0;
593 }
594 
595 /// findLoopHeaders - We do not want jump threading to turn proper loop
596 /// structures into irreducible loops. Doing this breaks up the loop nesting
597 /// hierarchy and pessimizes later transformations. To prevent this from
598 /// happening, we first have to find the loop headers. Here we approximate this
599 /// by finding targets of backedges in the CFG.
600 ///
601 /// Note that there definitely are cases when we want to allow threading of
602 /// edges across a loop header. For example, threading a jump from outside the
603 /// loop (the preheader) to an exit block of the loop is definitely profitable.
604 /// It is also almost always profitable to thread backedges from within the loop
605 /// to exit blocks, and is often profitable to thread backedges to other blocks
606 /// within the loop (forming a nested loop). This simple analysis is not rich
607 /// enough to track all of these properties and keep it up-to-date as the CFG
608 /// mutates, so we don't allow any of these transformations.
611  FindFunctionBackedges(F, Edges);
612 
613  for (const auto &Edge : Edges)
614  LoopHeaders.insert(Edge.second);
615 }
616 
617 /// getKnownConstant - Helper method to determine if we can thread over a
618 /// terminator with the given value as its condition, and if so what value to
619 /// use for that. What kind of value this is depends on whether we want an
620 /// integer or a block address, but an undef is always accepted.
621 /// Returns null if Val is null or not an appropriate constant.
623  if (!Val)
624  return nullptr;
625 
626  // Undef is "known" enough.
627  if (UndefValue *U = dyn_cast<UndefValue>(Val))
628  return U;
629 
631  return dyn_cast<BlockAddress>(Val->stripPointerCasts());
632 
633  return dyn_cast<ConstantInt>(Val);
634 }
635 
636 /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
637 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
638 /// in any of our predecessors. If so, return the known list of value and pred
639 /// BB in the result vector.
640 ///
641 /// This returns true if there were any known values.
643  Value *V, BasicBlock *BB, PredValueInfo &Result,
645  Instruction *CxtI) {
646  // This method walks up use-def chains recursively. Because of this, we could
647  // get into an infinite loop going around loops in the use-def chain. To
648  // prevent this, keep track of what (value, block) pairs we've already visited
649  // and terminate the search if we loop back to them
650  if (!RecursionSet.insert(V).second)
651  return false;
652 
653  // If V is a constant, then it is known in all predecessors.
654  if (Constant *KC = getKnownConstant(V, Preference)) {
655  for (BasicBlock *Pred : predecessors(BB))
656  Result.emplace_back(KC, Pred);
657 
658  return !Result.empty();
659  }
660 
661  // If V is a non-instruction value, or an instruction in a different block,
662  // then it can't be derived from a PHI.
663  Instruction *I = dyn_cast<Instruction>(V);
664  if (!I || I->getParent() != BB) {
665 
666  // Okay, if this is a live-in value, see if it has a known value at the end
667  // of any of our predecessors.
668  //
669  // FIXME: This should be an edge property, not a block end property.
670  /// TODO: Per PR2563, we could infer value range information about a
671  /// predecessor based on its terminator.
672  //
673  // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
674  // "I" is a non-local compare-with-a-constant instruction. This would be
675  // able to handle value inequalities better, for example if the compare is
676  // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
677  // Perhaps getConstantOnEdge should be smart enough to do this?
678  for (BasicBlock *P : predecessors(BB)) {
679  // If the value is known by LazyValueInfo to be a constant in a
680  // predecessor, use that information to try to thread this block.
681  Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
682  if (Constant *KC = getKnownConstant(PredCst, Preference))
683  Result.emplace_back(KC, P);
684  }
685 
686  return !Result.empty();
687  }
688 
689  /// If I is a PHI node, then we know the incoming values for any constants.
690  if (PHINode *PN = dyn_cast<PHINode>(I)) {
691  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
692  Value *InVal = PN->getIncomingValue(i);
693  if (Constant *KC = getKnownConstant(InVal, Preference)) {
694  Result.emplace_back(KC, PN->getIncomingBlock(i));
695  } else {
696  Constant *CI = LVI->getConstantOnEdge(InVal,
697  PN->getIncomingBlock(i),
698  BB, CxtI);
699  if (Constant *KC = getKnownConstant(CI, Preference))
700  Result.emplace_back(KC, PN->getIncomingBlock(i));
701  }
702  }
703 
704  return !Result.empty();
705  }
706 
707  // Handle Cast instructions.
708  if (CastInst *CI = dyn_cast<CastInst>(I)) {
709  Value *Source = CI->getOperand(0);
710  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
711  RecursionSet, CxtI);
712  if (Result.empty())
713  return false;
714 
715  // Convert the known values.
716  for (auto &R : Result)
717  R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
718 
719  return true;
720  }
721 
722  if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
723  Value *Source = FI->getOperand(0);
724  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
725  RecursionSet, CxtI);
726 
727  erase_if(Result, [](auto &Pair) {
728  return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
729  });
730 
731  return !Result.empty();
732  }
733 
734  // Handle some boolean conditions.
735  if (I->getType()->getPrimitiveSizeInBits() == 1) {
736  using namespace PatternMatch;
737 
738  assert(Preference == WantInteger && "One-bit non-integer type?");
739  // X | true -> true
740  // X & false -> false
741  Value *Op0, *Op1;
742  if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
743  match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
744  PredValueInfoTy LHSVals, RHSVals;
745 
746  computeValueKnownInPredecessorsImpl(Op0, BB, LHSVals, WantInteger,
747  RecursionSet, CxtI);
748  computeValueKnownInPredecessorsImpl(Op1, BB, RHSVals, WantInteger,
749  RecursionSet, CxtI);
750 
751  if (LHSVals.empty() && RHSVals.empty())
752  return false;
753 
754  ConstantInt *InterestingVal;
755  if (match(I, m_LogicalOr()))
756  InterestingVal = ConstantInt::getTrue(I->getContext());
757  else
758  InterestingVal = ConstantInt::getFalse(I->getContext());
759 
760  SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
761 
762  // Scan for the sentinel. If we find an undef, force it to the
763  // interesting value: x|undef -> true and x&undef -> false.
764  for (const auto &LHSVal : LHSVals)
765  if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
766  Result.emplace_back(InterestingVal, LHSVal.second);
767  LHSKnownBBs.insert(LHSVal.second);
768  }
769  for (const auto &RHSVal : RHSVals)
770  if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
771  // If we already inferred a value for this block on the LHS, don't
772  // re-add it.
773  if (!LHSKnownBBs.count(RHSVal.second))
774  Result.emplace_back(InterestingVal, RHSVal.second);
775  }
776 
777  return !Result.empty();
778  }
779 
780  // Handle the NOT form of XOR.
781  if (I->getOpcode() == Instruction::Xor &&
782  isa<ConstantInt>(I->getOperand(1)) &&
783  cast<ConstantInt>(I->getOperand(1))->isOne()) {
784  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
785  WantInteger, RecursionSet, CxtI);
786  if (Result.empty())
787  return false;
788 
789  // Invert the known values.
790  for (auto &R : Result)
791  R.first = ConstantExpr::getNot(R.first);
792 
793  return true;
794  }
795 
796  // Try to simplify some other binary operator values.
797  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
799  && "A binary operator creating a block address?");
800  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
801  PredValueInfoTy LHSVals;
802  computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
803  WantInteger, RecursionSet, CxtI);
804 
805  // Try to use constant folding to simplify the binary operator.
806  for (const auto &LHSVal : LHSVals) {
807  Constant *V = LHSVal.first;
808  Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
809 
810  if (Constant *KC = getKnownConstant(Folded, WantInteger))
811  Result.emplace_back(KC, LHSVal.second);
812  }
813  }
814 
815  return !Result.empty();
816  }
817 
818  // Handle compare with phi operand, where the PHI is defined in this block.
819  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
820  assert(Preference == WantInteger && "Compares only produce integers");
821  Type *CmpType = Cmp->getType();
822  Value *CmpLHS = Cmp->getOperand(0);
823  Value *CmpRHS = Cmp->getOperand(1);
824  CmpInst::Predicate Pred = Cmp->getPredicate();
825 
826  PHINode *PN = dyn_cast<PHINode>(CmpLHS);
827  if (!PN)
828  PN = dyn_cast<PHINode>(CmpRHS);
829  if (PN && PN->getParent() == BB) {
830  const DataLayout &DL = PN->getModule()->getDataLayout();
831  // We can do this simplification if any comparisons fold to true or false.
832  // See if any do.
833  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
834  BasicBlock *PredBB = PN->getIncomingBlock(i);
835  Value *LHS, *RHS;
836  if (PN == CmpLHS) {
837  LHS = PN->getIncomingValue(i);
838  RHS = CmpRHS->DoPHITranslation(BB, PredBB);
839  } else {
840  LHS = CmpLHS->DoPHITranslation(BB, PredBB);
841  RHS = PN->getIncomingValue(i);
842  }
843  Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
844  if (!Res) {
845  if (!isa<Constant>(RHS))
846  continue;
847 
848  // getPredicateOnEdge call will make no sense if LHS is defined in BB.
849  auto LHSInst = dyn_cast<Instruction>(LHS);
850  if (LHSInst && LHSInst->getParent() == BB)
851  continue;
852 
854  ResT = LVI->getPredicateOnEdge(Pred, LHS,
855  cast<Constant>(RHS), PredBB, BB,
856  CxtI ? CxtI : Cmp);
857  if (ResT == LazyValueInfo::Unknown)
858  continue;
859  Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
860  }
861 
862  if (Constant *KC = getKnownConstant(Res, WantInteger))
863  Result.emplace_back(KC, PredBB);
864  }
865 
866  return !Result.empty();
867  }
868 
869  // If comparing a live-in value against a constant, see if we know the
870  // live-in value on any predecessors.
871  if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
872  Constant *CmpConst = cast<Constant>(CmpRHS);
873 
874  if (!isa<Instruction>(CmpLHS) ||
875  cast<Instruction>(CmpLHS)->getParent() != BB) {
876  for (BasicBlock *P : predecessors(BB)) {
877  // If the value is known by LazyValueInfo to be a constant in a
878  // predecessor, use that information to try to thread this block.
880  LVI->getPredicateOnEdge(Pred, CmpLHS,
881  CmpConst, P, BB, CxtI ? CxtI : Cmp);
882  if (Res == LazyValueInfo::Unknown)
883  continue;
884 
885  Constant *ResC = ConstantInt::get(CmpType, Res);
886  Result.emplace_back(ResC, P);
887  }
888 
889  return !Result.empty();
890  }
891 
892  // InstCombine can fold some forms of constant range checks into
893  // (icmp (add (x, C1)), C2). See if we have we have such a thing with
894  // x as a live-in.
895  {
896  using namespace PatternMatch;
897 
898  Value *AddLHS;
899  ConstantInt *AddConst;
900  if (isa<ConstantInt>(CmpConst) &&
901  match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
902  if (!isa<Instruction>(AddLHS) ||
903  cast<Instruction>(AddLHS)->getParent() != BB) {
904  for (BasicBlock *P : predecessors(BB)) {
905  // If the value is known by LazyValueInfo to be a ConstantRange in
906  // a predecessor, use that information to try to thread this
907  // block.
908  ConstantRange CR = LVI->getConstantRangeOnEdge(
909  AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
910  // Propagate the range through the addition.
911  CR = CR.add(AddConst->getValue());
912 
913  // Get the range where the compare returns true.
915  Pred, cast<ConstantInt>(CmpConst)->getValue());
916 
917  Constant *ResC;
918  if (CmpRange.contains(CR))
919  ResC = ConstantInt::getTrue(CmpType);
920  else if (CmpRange.inverse().contains(CR))
921  ResC = ConstantInt::getFalse(CmpType);
922  else
923  continue;
924 
925  Result.emplace_back(ResC, P);
926  }
927 
928  return !Result.empty();
929  }
930  }
931  }
932 
933  // Try to find a constant value for the LHS of a comparison,
934  // and evaluate it statically if we can.
935  PredValueInfoTy LHSVals;
936  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
937  WantInteger, RecursionSet, CxtI);
938 
939  for (const auto &LHSVal : LHSVals) {
940  Constant *V = LHSVal.first;
941  Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
942  if (Constant *KC = getKnownConstant(Folded, WantInteger))
943  Result.emplace_back(KC, LHSVal.second);
944  }
945 
946  return !Result.empty();
947  }
948  }
949 
950  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
951  // Handle select instructions where at least one operand is a known constant
952  // and we can figure out the condition value for any predecessor block.
953  Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
954  Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
955  PredValueInfoTy Conds;
956  if ((TrueVal || FalseVal) &&
957  computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
958  WantInteger, RecursionSet, CxtI)) {
959  for (auto &C : Conds) {
960  Constant *Cond = C.first;
961 
962  // Figure out what value to use for the condition.
963  bool KnownCond;
964  if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
965  // A known boolean.
966  KnownCond = CI->isOne();
967  } else {
968  assert(isa<UndefValue>(Cond) && "Unexpected condition value");
969  // Either operand will do, so be sure to pick the one that's a known
970  // constant.
971  // FIXME: Do this more cleverly if both values are known constants?
972  KnownCond = (TrueVal != nullptr);
973  }
974 
975  // See if the select has a known constant value for this predecessor.
976  if (Constant *Val = KnownCond ? TrueVal : FalseVal)
977  Result.emplace_back(Val, C.second);
978  }
979 
980  return !Result.empty();
981  }
982  }
983 
984  // If all else fails, see if LVI can figure out a constant value for us.
985  assert(CxtI->getParent() == BB && "CxtI should be in BB");
986  Constant *CI = LVI->getConstant(V, CxtI);
987  if (Constant *KC = getKnownConstant(CI, Preference)) {
988  for (BasicBlock *Pred : predecessors(BB))
989  Result.emplace_back(KC, Pred);
990  }
991 
992  return !Result.empty();
993 }
994 
995 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
996 /// in an undefined jump, decide which block is best to revector to.
997 ///
998 /// Since we can pick an arbitrary destination, we pick the successor with the
999 /// fewest predecessors. This should reduce the in-degree of the others.
1001  Instruction *BBTerm = BB->getTerminator();
1002  unsigned MinSucc = 0;
1003  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
1004  // Compute the successor with the minimum number of predecessors.
1005  unsigned MinNumPreds = pred_size(TestBB);
1006  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1007  TestBB = BBTerm->getSuccessor(i);
1008  unsigned NumPreds = pred_size(TestBB);
1009  if (NumPreds < MinNumPreds) {
1010  MinSucc = i;
1011  MinNumPreds = NumPreds;
1012  }
1013  }
1014 
1015  return MinSucc;
1016 }
1017 
1019  if (!BB->hasAddressTaken()) return false;
1020 
1021  // If the block has its address taken, it may be a tree of dead constants
1022  // hanging off of it. These shouldn't keep the block alive.
1025  return !BA->use_empty();
1026 }
1027 
1028 /// processBlock - If there are any predecessors whose control can be threaded
1029 /// through to a successor, transform them now.
1031  // If the block is trivially dead, just return and let the caller nuke it.
1032  // This simplifies other transformations.
1033  if (DTU->isBBPendingDeletion(BB) ||
1034  (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
1035  return false;
1036 
1037  // If this block has a single predecessor, and if that pred has a single
1038  // successor, merge the blocks. This encourages recursive jump threading
1039  // because now the condition in this block can be threaded through
1040  // predecessors of our predecessor block.
1041  if (maybeMergeBasicBlockIntoOnlyPred(BB))
1042  return true;
1043 
1044  if (tryToUnfoldSelectInCurrBB(BB))
1045  return true;
1046 
1047  // Look if we can propagate guards to predecessors.
1048  if (HasGuards && processGuards(BB))
1049  return true;
1050 
1051  // What kind of constant we're looking for.
1053 
1054  // Look to see if the terminator is a conditional branch, switch or indirect
1055  // branch, if not we can't thread it.
1056  Value *Condition;
1057  Instruction *Terminator = BB->getTerminator();
1058  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
1059  // Can't thread an unconditional jump.
1060  if (BI->isUnconditional()) return false;
1061  Condition = BI->getCondition();
1062  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
1063  Condition = SI->getCondition();
1064  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
1065  // Can't thread indirect branch with no successors.
1066  if (IB->getNumSuccessors() == 0) return false;
1067  Condition = IB->getAddress()->stripPointerCasts();
1069  } else {
1070  return false; // Must be an invoke or callbr.
1071  }
1072 
1073  // Keep track if we constant folded the condition in this invocation.
1074  bool ConstantFolded = false;
1075 
1076  // Run constant folding to see if we can reduce the condition to a simple
1077  // constant.
1078  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1079  Value *SimpleVal =
1080  ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
1081  if (SimpleVal) {
1082  I->replaceAllUsesWith(SimpleVal);
1083  if (isInstructionTriviallyDead(I, TLI))
1084  I->eraseFromParent();
1085  Condition = SimpleVal;
1086  ConstantFolded = true;
1087  }
1088  }
1089 
1090  // If the terminator is branching on an undef or freeze undef, we can pick any
1091  // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1092  auto *FI = dyn_cast<FreezeInst>(Condition);
1093  if (isa<UndefValue>(Condition) ||
1094  (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1095  unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1096  std::vector<DominatorTree::UpdateType> Updates;
1097 
1098  // Fold the branch/switch.
1099  Instruction *BBTerm = BB->getTerminator();
1100  Updates.reserve(BBTerm->getNumSuccessors());
1101  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1102  if (i == BestSucc) continue;
1103  BasicBlock *Succ = BBTerm->getSuccessor(i);
1104  Succ->removePredecessor(BB, true);
1105  Updates.push_back({DominatorTree::Delete, BB, Succ});
1106  }
1107 
1108  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1109  << "' folding undef terminator: " << *BBTerm << '\n');
1110  BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1111  ++NumFolds;
1112  BBTerm->eraseFromParent();
1113  DTU->applyUpdatesPermissive(Updates);
1114  if (FI)
1115  FI->eraseFromParent();
1116  return true;
1117  }
1118 
1119  // If the terminator of this block is branching on a constant, simplify the
1120  // terminator to an unconditional branch. This can occur due to threading in
1121  // other blocks.
1122  if (getKnownConstant(Condition, Preference)) {
1123  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1124  << "' folding terminator: " << *BB->getTerminator()
1125  << '\n');
1126  ++NumFolds;
1127  ConstantFoldTerminator(BB, true, nullptr, DTU);
1128  if (HasProfileData)
1129  BPI->eraseBlock(BB);
1130  return true;
1131  }
1132 
1133  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1134 
1135  // All the rest of our checks depend on the condition being an instruction.
1136  if (!CondInst) {
1137  // FIXME: Unify this with code below.
1138  if (processThreadableEdges(Condition, BB, Preference, Terminator))
1139  return true;
1140  return ConstantFolded;
1141  }
1142 
1143  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
1144  // If we're branching on a conditional, LVI might be able to determine
1145  // it's value at the branch instruction. We only handle comparisons
1146  // against a constant at this time.
1147  // TODO: This should be extended to handle switches as well.
1148  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
1149  Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
1150  if (CondBr && CondConst) {
1151  // We should have returned as soon as we turn a conditional branch to
1152  // unconditional. Because its no longer interesting as far as jump
1153  // threading is concerned.
1154  assert(CondBr->isConditional() && "Threading on unconditional terminator");
1155 
1157  LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1158  CondConst, CondBr, /*UseBlockValue=*/false);
1159  if (Ret != LazyValueInfo::Unknown) {
1160  unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
1161  unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
1162  BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
1163  ToRemoveSucc->removePredecessor(BB, true);
1164  BranchInst *UncondBr =
1165  BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
1166  UncondBr->setDebugLoc(CondBr->getDebugLoc());
1167  ++NumFolds;
1168  CondBr->eraseFromParent();
1169  if (CondCmp->use_empty())
1170  CondCmp->eraseFromParent();
1171  // We can safely replace *some* uses of the CondInst if it has
1172  // exactly one value as returned by LVI. RAUW is incorrect in the
1173  // presence of guards and assumes, that have the `Cond` as the use. This
1174  // is because we use the guards/assume to reason about the `Cond` value
1175  // at the end of block, but RAUW unconditionally replaces all uses
1176  // including the guards/assumes themselves and the uses before the
1177  // guard/assume.
1178  else if (CondCmp->getParent() == BB) {
1179  auto *CI = Ret == LazyValueInfo::True ?
1180  ConstantInt::getTrue(CondCmp->getType()) :
1181  ConstantInt::getFalse(CondCmp->getType());
1182  replaceFoldableUses(CondCmp, CI);
1183  }
1184  DTU->applyUpdatesPermissive(
1185  {{DominatorTree::Delete, BB, ToRemoveSucc}});
1186  if (HasProfileData)
1187  BPI->eraseBlock(BB);
1188  return true;
1189  }
1190 
1191  // We did not manage to simplify this branch, try to see whether
1192  // CondCmp depends on a known phi-select pattern.
1193  if (tryToUnfoldSelect(CondCmp, BB))
1194  return true;
1195  }
1196  }
1197 
1198  if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1199  if (tryToUnfoldSelect(SI, BB))
1200  return true;
1201 
1202  // Check for some cases that are worth simplifying. Right now we want to look
1203  // for loads that are used by a switch or by the condition for the branch. If
1204  // we see one, check to see if it's partially redundant. If so, insert a PHI
1205  // which can then be used to thread the values.
1206  Value *SimplifyValue = CondInst;
1207 
1208  if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
1209  // Look into freeze's operand
1210  SimplifyValue = FI->getOperand(0);
1211 
1212  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1213  if (isa<Constant>(CondCmp->getOperand(1)))
1214  SimplifyValue = CondCmp->getOperand(0);
1215 
1216  // TODO: There are other places where load PRE would be profitable, such as
1217  // more complex comparisons.
1218  if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1219  if (simplifyPartiallyRedundantLoad(LoadI))
1220  return true;
1221 
1222  // Before threading, try to propagate profile data backwards:
1223  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1224  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1226 
1227  // Handle a variety of cases where we are branching on something derived from
1228  // a PHI node in the current block. If we can prove that any predecessors
1229  // compute a predictable value based on a PHI node, thread those predecessors.
1230  if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1231  return true;
1232 
1233  // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1234  // the current block, see if we can simplify.
1235  PHINode *PN = dyn_cast<PHINode>(
1236  isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
1237  : CondInst);
1238 
1239  if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1240  return processBranchOnPHI(PN);
1241 
1242  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1243  if (CondInst->getOpcode() == Instruction::Xor &&
1244  CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1245  return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1246 
1247  // Search for a stronger dominating condition that can be used to simplify a
1248  // conditional branch leaving BB.
1249  if (processImpliedCondition(BB))
1250  return true;
1251 
1252  return false;
1253 }
1254 
1256  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1257  if (!BI || !BI->isConditional())
1258  return false;
1259 
1260  Value *Cond = BI->getCondition();
1261  BasicBlock *CurrentBB = BB;
1262  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1263  unsigned Iter = 0;
1264 
1265  auto &DL = BB->getModule()->getDataLayout();
1266 
1267  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1268  auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1269  if (!PBI || !PBI->isConditional())
1270  return false;
1271  if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1272  return false;
1273 
1274  bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1275  Optional<bool> Implication =
1276  isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1277  if (Implication) {
1278  BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1279  BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1280  RemoveSucc->removePredecessor(BB);
1281  BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
1282  UncondBI->setDebugLoc(BI->getDebugLoc());
1283  ++NumFolds;
1284  BI->eraseFromParent();
1285  DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1286  if (HasProfileData)
1287  BPI->eraseBlock(BB);
1288  return true;
1289  }
1290  CurrentBB = CurrentPred;
1291  CurrentPred = CurrentBB->getSinglePredecessor();
1292  }
1293 
1294  return false;
1295 }
1296 
1297 /// Return true if Op is an instruction defined in the given block.
1299  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1300  if (OpInst->getParent() == BB)
1301  return true;
1302  return false;
1303 }
1304 
1305 /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1306 /// redundant load instruction, eliminate it by replacing it with a PHI node.
1307 /// This is an important optimization that encourages jump threading, and needs
1308 /// to be run interlaced with other jump threading tasks.
1310  // Don't hack volatile and ordered loads.
1311  if (!LoadI->isUnordered()) return false;
1312 
1313  // If the load is defined in a block with exactly one predecessor, it can't be
1314  // partially redundant.
1315  BasicBlock *LoadBB = LoadI->getParent();
1316  if (LoadBB->getSinglePredecessor())
1317  return false;
1318 
1319  // If the load is defined in an EH pad, it can't be partially redundant,
1320  // because the edges between the invoke and the EH pad cannot have other
1321  // instructions between them.
1322  if (LoadBB->isEHPad())
1323  return false;
1324 
1325  Value *LoadedPtr = LoadI->getOperand(0);
1326 
1327  // If the loaded operand is defined in the LoadBB and its not a phi,
1328  // it can't be available in predecessors.
1329  if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1330  return false;
1331 
1332  // Scan a few instructions up from the load, to see if it is obviously live at
1333  // the entry to its block.
1334  BasicBlock::iterator BBIt(LoadI);
1335  bool IsLoadCSE;
1336  if (Value *AvailableVal = FindAvailableLoadedValue(
1337  LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1338  // If the value of the load is locally available within the block, just use
1339  // it. This frequently occurs for reg2mem'd allocas.
1340 
1341  if (IsLoadCSE) {
1342  LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1343  combineMetadataForCSE(NLoadI, LoadI, false);
1344  };
1345 
1346  // If the returned value is the load itself, replace with an undef. This can
1347  // only happen in dead loops.
1348  if (AvailableVal == LoadI)
1349  AvailableVal = UndefValue::get(LoadI->getType());
1350  if (AvailableVal->getType() != LoadI->getType())
1351  AvailableVal = CastInst::CreateBitOrPointerCast(
1352  AvailableVal, LoadI->getType(), "", LoadI);
1353  LoadI->replaceAllUsesWith(AvailableVal);
1354  LoadI->eraseFromParent();
1355  return true;
1356  }
1357 
1358  // Otherwise, if we scanned the whole block and got to the top of the block,
1359  // we know the block is locally transparent to the load. If not, something
1360  // might clobber its value.
1361  if (BBIt != LoadBB->begin())
1362  return false;
1363 
1364  // If all of the loads and stores that feed the value have the same AA tags,
1365  // then we can propagate them onto any newly inserted loads.
1366  AAMDNodes AATags = LoadI->getAAMetadata();
1367 
1368  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1369 
1370  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1371 
1372  AvailablePredsTy AvailablePreds;
1373  BasicBlock *OneUnavailablePred = nullptr;
1374  SmallVector<LoadInst*, 8> CSELoads;
1375 
1376  // If we got here, the loaded value is transparent through to the start of the
1377  // block. Check to see if it is available in any of the predecessor blocks.
1378  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1379  // If we already scanned this predecessor, skip it.
1380  if (!PredsScanned.insert(PredBB).second)
1381  continue;
1382 
1383  BBIt = PredBB->end();
1384  unsigned NumScanedInst = 0;
1385  Value *PredAvailable = nullptr;
1386  // NOTE: We don't CSE load that is volatile or anything stronger than
1387  // unordered, that should have been checked when we entered the function.
1388  assert(LoadI->isUnordered() &&
1389  "Attempting to CSE volatile or atomic loads");
1390  // If this is a load on a phi pointer, phi-translate it and search
1391  // for available load/store to the pointer in predecessors.
1392  Type *AccessTy = LoadI->getType();
1393  const auto &DL = LoadI->getModule()->getDataLayout();
1394  MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1395  LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1396  AATags);
1397  PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
1398  PredBB, BBIt, DefMaxInstsToScan,
1399  AA, &IsLoadCSE, &NumScanedInst);
1400 
1401  // If PredBB has a single predecessor, continue scanning through the
1402  // single predecessor.
1403  BasicBlock *SinglePredBB = PredBB;
1404  while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1405  NumScanedInst < DefMaxInstsToScan) {
1406  SinglePredBB = SinglePredBB->getSinglePredecessor();
1407  if (SinglePredBB) {
1408  BBIt = SinglePredBB->end();
1409  PredAvailable = findAvailablePtrLoadStore(
1410  Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1411  (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1412  &NumScanedInst);
1413  }
1414  }
1415 
1416  if (!PredAvailable) {
1417  OneUnavailablePred = PredBB;
1418  continue;
1419  }
1420 
1421  if (IsLoadCSE)
1422  CSELoads.push_back(cast<LoadInst>(PredAvailable));
1423 
1424  // If so, this load is partially redundant. Remember this info so that we
1425  // can create a PHI node.
1426  AvailablePreds.emplace_back(PredBB, PredAvailable);
1427  }
1428 
1429  // If the loaded value isn't available in any predecessor, it isn't partially
1430  // redundant.
1431  if (AvailablePreds.empty()) return false;
1432 
1433  // Okay, the loaded value is available in at least one (and maybe all!)
1434  // predecessors. If the value is unavailable in more than one unique
1435  // predecessor, we want to insert a merge block for those common predecessors.
1436  // This ensures that we only have to insert one reload, thus not increasing
1437  // code size.
1438  BasicBlock *UnavailablePred = nullptr;
1439 
1440  // If the value is unavailable in one of predecessors, we will end up
1441  // inserting a new instruction into them. It is only valid if all the
1442  // instructions before LoadI are guaranteed to pass execution to its
1443  // successor, or if LoadI is safe to speculate.
1444  // TODO: If this logic becomes more complex, and we will perform PRE insertion
1445  // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1446  // It requires domination tree analysis, so for this simple case it is an
1447  // overkill.
1448  if (PredsScanned.size() != AvailablePreds.size() &&
1450  for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1452  return false;
1453 
1454  // If there is exactly one predecessor where the value is unavailable, the
1455  // already computed 'OneUnavailablePred' block is it. If it ends in an
1456  // unconditional branch, we know that it isn't a critical edge.
1457  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1458  OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1459  UnavailablePred = OneUnavailablePred;
1460  } else if (PredsScanned.size() != AvailablePreds.size()) {
1461  // Otherwise, we had multiple unavailable predecessors or we had a critical
1462  // edge from the one.
1463  SmallVector<BasicBlock*, 8> PredsToSplit;
1464  SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1465 
1466  for (const auto &AvailablePred : AvailablePreds)
1467  AvailablePredSet.insert(AvailablePred.first);
1468 
1469  // Add all the unavailable predecessors to the PredsToSplit list.
1470  for (BasicBlock *P : predecessors(LoadBB)) {
1471  // If the predecessor is an indirect goto, we can't split the edge.
1472  // Same for CallBr.
1473  if (isa<IndirectBrInst>(P->getTerminator()) ||
1474  isa<CallBrInst>(P->getTerminator()))
1475  return false;
1476 
1477  if (!AvailablePredSet.count(P))
1478  PredsToSplit.push_back(P);
1479  }
1480 
1481  // Split them out to their own block.
1482  UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1483  }
1484 
1485  // If the value isn't available in all predecessors, then there will be
1486  // exactly one where it isn't available. Insert a load on that edge and add
1487  // it to the AvailablePreds list.
1488  if (UnavailablePred) {
1489  assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1490  "Can't handle critical edge here!");
1491  LoadInst *NewVal = new LoadInst(
1492  LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1493  LoadI->getName() + ".pr", false, LoadI->getAlign(),
1494  LoadI->getOrdering(), LoadI->getSyncScopeID(),
1495  UnavailablePred->getTerminator());
1496  NewVal->setDebugLoc(LoadI->getDebugLoc());
1497  if (AATags)
1498  NewVal->setAAMetadata(AATags);
1499 
1500  AvailablePreds.emplace_back(UnavailablePred, NewVal);
1501  }
1502 
1503  // Now we know that each predecessor of this block has a value in
1504  // AvailablePreds, sort them for efficient access as we're walking the preds.
1505  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1506 
1507  // Create a PHI node at the start of the block for the PRE'd load value.
1508  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1509  PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
1510  &LoadBB->front());
1511  PN->takeName(LoadI);
1512  PN->setDebugLoc(LoadI->getDebugLoc());
1513 
1514  // Insert new entries into the PHI for each predecessor. A single block may
1515  // have multiple entries here.
1516  for (pred_iterator PI = PB; PI != PE; ++PI) {
1517  BasicBlock *P = *PI;
1518  AvailablePredsTy::iterator I =
1519  llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1520 
1521  assert(I != AvailablePreds.end() && I->first == P &&
1522  "Didn't find entry for predecessor!");
1523 
1524  // If we have an available predecessor but it requires casting, insert the
1525  // cast in the predecessor and use the cast. Note that we have to update the
1526  // AvailablePreds vector as we go so that all of the PHI entries for this
1527  // predecessor use the same bitcast.
1528  Value *&PredV = I->second;
1529  if (PredV->getType() != LoadI->getType())
1530  PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
1531  P->getTerminator());
1532 
1533  PN->addIncoming(PredV, I->first);
1534  }
1535 
1536  for (LoadInst *PredLoadI : CSELoads) {
1537  combineMetadataForCSE(PredLoadI, LoadI, true);
1538  }
1539 
1540  LoadI->replaceAllUsesWith(PN);
1541  LoadI->eraseFromParent();
1542 
1543  return true;
1544 }
1545 
1546 /// findMostPopularDest - The specified list contains multiple possible
1547 /// threadable destinations. Pick the one that occurs the most frequently in
1548 /// the list.
1549 static BasicBlock *
1551  const SmallVectorImpl<std::pair<BasicBlock *,
1552  BasicBlock *>> &PredToDestList) {
1553  assert(!PredToDestList.empty());
1554 
1555  // Determine popularity. If there are multiple possible destinations, we
1556  // explicitly choose to ignore 'undef' destinations. We prefer to thread
1557  // blocks with known and real destinations to threading undef. We'll handle
1558  // them later if interesting.
1559  MapVector<BasicBlock *, unsigned> DestPopularity;
1560 
1561  // Populate DestPopularity with the successors in the order they appear in the
1562  // successor list. This way, we ensure determinism by iterating it in the
1563  // same order in std::max_element below. We map nullptr to 0 so that we can
1564  // return nullptr when PredToDestList contains nullptr only.
1565  DestPopularity[nullptr] = 0;
1566  for (auto *SuccBB : successors(BB))
1567  DestPopularity[SuccBB] = 0;
1568 
1569  for (const auto &PredToDest : PredToDestList)
1570  if (PredToDest.second)
1571  DestPopularity[PredToDest.second]++;
1572 
1573  // Find the most popular dest.
1574  using VT = decltype(DestPopularity)::value_type;
1575  auto MostPopular = std::max_element(
1576  DestPopularity.begin(), DestPopularity.end(),
1577  [](const VT &L, const VT &R) { return L.second < R.second; });
1578 
1579  // Okay, we have finally picked the most popular destination.
1580  return MostPopular->first;
1581 }
1582 
1583 // Try to evaluate the value of V when the control flows from PredPredBB to
1584 // BB->getSinglePredecessor() and then on to BB.
1586  BasicBlock *PredPredBB,
1587  Value *V) {
1588  BasicBlock *PredBB = BB->getSinglePredecessor();
1589  assert(PredBB && "Expected a single predecessor");
1590 
1591  if (Constant *Cst = dyn_cast<Constant>(V)) {
1592  return Cst;
1593  }
1594 
1595  // Consult LVI if V is not an instruction in BB or PredBB.
1596  Instruction *I = dyn_cast<Instruction>(V);
1597  if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1598  return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1599  }
1600 
1601  // Look into a PHI argument.
1602  if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1603  if (PHI->getParent() == PredBB)
1604  return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1605  return nullptr;
1606  }
1607 
1608  // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1609  if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1610  if (CondCmp->getParent() == BB) {
1611  Constant *Op0 =
1612  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1613  Constant *Op1 =
1614  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1615  if (Op0 && Op1) {
1616  return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1617  }
1618  }
1619  return nullptr;
1620  }
1621 
1622  return nullptr;
1623 }
1624 
1627  Instruction *CxtI) {
1628  // If threading this would thread across a loop header, don't even try to
1629  // thread the edge.
1630  if (LoopHeaders.count(BB))
1631  return false;
1632 
1633  PredValueInfoTy PredValues;
1634  if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1635  CxtI)) {
1636  // We don't have known values in predecessors. See if we can thread through
1637  // BB and its sole predecessor.
1638  return maybethreadThroughTwoBasicBlocks(BB, Cond);
1639  }
1640 
1641  assert(!PredValues.empty() &&
1642  "computeValueKnownInPredecessors returned true with no values");
1643 
1644  LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1645  for (const auto &PredValue : PredValues) {
1646  dbgs() << " BB '" << BB->getName()
1647  << "': FOUND condition = " << *PredValue.first
1648  << " for pred '" << PredValue.second->getName() << "'.\n";
1649  });
1650 
1651  // Decide what we want to thread through. Convert our list of known values to
1652  // a list of known destinations for each pred. This also discards duplicate
1653  // predecessors and keeps track of the undefined inputs (which are represented
1654  // as a null dest in the PredToDestList).
1655  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1657 
1658  BasicBlock *OnlyDest = nullptr;
1659  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1660  Constant *OnlyVal = nullptr;
1661  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1662 
1663  for (const auto &PredValue : PredValues) {
1664  BasicBlock *Pred = PredValue.second;
1665  if (!SeenPreds.insert(Pred).second)
1666  continue; // Duplicate predecessor entry.
1667 
1668  Constant *Val = PredValue.first;
1669 
1670  BasicBlock *DestBB;
1671  if (isa<UndefValue>(Val))
1672  DestBB = nullptr;
1673  else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1674  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1675  DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1676  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1677  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1678  DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1679  } else {
1680  assert(isa<IndirectBrInst>(BB->getTerminator())
1681  && "Unexpected terminator");
1682  assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1683  DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1684  }
1685 
1686  // If we have exactly one destination, remember it for efficiency below.
1687  if (PredToDestList.empty()) {
1688  OnlyDest = DestBB;
1689  OnlyVal = Val;
1690  } else {
1691  if (OnlyDest != DestBB)
1692  OnlyDest = MultipleDestSentinel;
1693  // It possible we have same destination, but different value, e.g. default
1694  // case in switchinst.
1695  if (Val != OnlyVal)
1696  OnlyVal = MultipleVal;
1697  }
1698 
1699  // If the predecessor ends with an indirect goto, we can't change its
1700  // destination. Same for CallBr.
1701  if (isa<IndirectBrInst>(Pred->getTerminator()) ||
1702  isa<CallBrInst>(Pred->getTerminator()))
1703  continue;
1704 
1705  PredToDestList.emplace_back(Pred, DestBB);
1706  }
1707 
1708  // If all edges were unthreadable, we fail.
1709  if (PredToDestList.empty())
1710  return false;
1711 
1712  // If all the predecessors go to a single known successor, we want to fold,
1713  // not thread. By doing so, we do not need to duplicate the current block and
1714  // also miss potential opportunities in case we dont/cant duplicate.
1715  if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1716  if (BB->hasNPredecessors(PredToDestList.size())) {
1717  bool SeenFirstBranchToOnlyDest = false;
1718  std::vector <DominatorTree::UpdateType> Updates;
1719  Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1720  for (BasicBlock *SuccBB : successors(BB)) {
1721  if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1722  SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1723  } else {
1724  SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1725  Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1726  }
1727  }
1728 
1729  // Finally update the terminator.
1730  Instruction *Term = BB->getTerminator();
1731  BranchInst::Create(OnlyDest, Term);
1732  ++NumFolds;
1733  Term->eraseFromParent();
1734  DTU->applyUpdatesPermissive(Updates);
1735  if (HasProfileData)
1736  BPI->eraseBlock(BB);
1737 
1738  // If the condition is now dead due to the removal of the old terminator,
1739  // erase it.
1740  if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1741  if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1742  CondInst->eraseFromParent();
1743  // We can safely replace *some* uses of the CondInst if it has
1744  // exactly one value as returned by LVI. RAUW is incorrect in the
1745  // presence of guards and assumes, that have the `Cond` as the use. This
1746  // is because we use the guards/assume to reason about the `Cond` value
1747  // at the end of block, but RAUW unconditionally replaces all uses
1748  // including the guards/assumes themselves and the uses before the
1749  // guard/assume.
1750  else if (OnlyVal && OnlyVal != MultipleVal &&
1751  CondInst->getParent() == BB)
1752  replaceFoldableUses(CondInst, OnlyVal);
1753  }
1754  return true;
1755  }
1756  }
1757 
1758  // Determine which is the most common successor. If we have many inputs and
1759  // this block is a switch, we want to start by threading the batch that goes
1760  // to the most popular destination first. If we only know about one
1761  // threadable destination (the common case) we can avoid this.
1762  BasicBlock *MostPopularDest = OnlyDest;
1763 
1764  if (MostPopularDest == MultipleDestSentinel) {
1765  // Remove any loop headers from the Dest list, threadEdge conservatively
1766  // won't process them, but we might have other destination that are eligible
1767  // and we still want to process.
1768  erase_if(PredToDestList,
1769  [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1770  return LoopHeaders.contains(PredToDest.second);
1771  });
1772 
1773  if (PredToDestList.empty())
1774  return false;
1775 
1776  MostPopularDest = findMostPopularDest(BB, PredToDestList);
1777  }
1778 
1779  // Now that we know what the most popular destination is, factor all
1780  // predecessors that will jump to it into a single predecessor.
1781  SmallVector<BasicBlock*, 16> PredsToFactor;
1782  for (const auto &PredToDest : PredToDestList)
1783  if (PredToDest.second == MostPopularDest) {
1784  BasicBlock *Pred = PredToDest.first;
1785 
1786  // This predecessor may be a switch or something else that has multiple
1787  // edges to the block. Factor each of these edges by listing them
1788  // according to # occurrences in PredsToFactor.
1789  for (BasicBlock *Succ : successors(Pred))
1790  if (Succ == BB)
1791  PredsToFactor.push_back(Pred);
1792  }
1793 
1794  // If the threadable edges are branching on an undefined value, we get to pick
1795  // the destination that these predecessors should get to.
1796  if (!MostPopularDest)
1797  MostPopularDest = BB->getTerminator()->
1798  getSuccessor(getBestDestForJumpOnUndef(BB));
1799 
1800  // Ok, try to thread it!
1801  return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1802 }
1803 
1804 /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1805 /// a PHI node (or freeze PHI) in the current block. See if there are any
1806 /// simplifications we can do based on inputs to the phi node.
1808  BasicBlock *BB = PN->getParent();
1809 
1810  // TODO: We could make use of this to do it once for blocks with common PHI
1811  // values.
1813  PredBBs.resize(1);
1814 
1815  // If any of the predecessor blocks end in an unconditional branch, we can
1816  // *duplicate* the conditional branch into that block in order to further
1817  // encourage jump threading and to eliminate cases where we have branch on a
1818  // phi of an icmp (branch on icmp is much better).
1819  // This is still beneficial when a frozen phi is used as the branch condition
1820  // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1821  // to br(icmp(freeze ...)).
1822  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1823  BasicBlock *PredBB = PN->getIncomingBlock(i);
1824  if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1825  if (PredBr->isUnconditional()) {
1826  PredBBs[0] = PredBB;
1827  // Try to duplicate BB into PredBB.
1828  if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1829  return true;
1830  }
1831  }
1832 
1833  return false;
1834 }
1835 
1836 /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1837 /// a xor instruction in the current block. See if there are any
1838 /// simplifications we can do based on inputs to the xor.
1840  BasicBlock *BB = BO->getParent();
1841 
1842  // If either the LHS or RHS of the xor is a constant, don't do this
1843  // optimization.
1844  if (isa<ConstantInt>(BO->getOperand(0)) ||
1845  isa<ConstantInt>(BO->getOperand(1)))
1846  return false;
1847 
1848  // If the first instruction in BB isn't a phi, we won't be able to infer
1849  // anything special about any particular predecessor.
1850  if (!isa<PHINode>(BB->front()))
1851  return false;
1852 
1853  // If this BB is a landing pad, we won't be able to split the edge into it.
1854  if (BB->isEHPad())
1855  return false;
1856 
1857  // If we have a xor as the branch input to this block, and we know that the
1858  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1859  // the condition into the predecessor and fix that value to true, saving some
1860  // logical ops on that path and encouraging other paths to simplify.
1861  //
1862  // This copies something like this:
1863  //
1864  // BB:
1865  // %X = phi i1 [1], [%X']
1866  // %Y = icmp eq i32 %A, %B
1867  // %Z = xor i1 %X, %Y
1868  // br i1 %Z, ...
1869  //
1870  // Into:
1871  // BB':
1872  // %Y = icmp ne i32 %A, %B
1873  // br i1 %Y, ...
1874 
1875  PredValueInfoTy XorOpValues;
1876  bool isLHS = true;
1877  if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1878  WantInteger, BO)) {
1879  assert(XorOpValues.empty());
1880  if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1881  WantInteger, BO))
1882  return false;
1883  isLHS = false;
1884  }
1885 
1886  assert(!XorOpValues.empty() &&
1887  "computeValueKnownInPredecessors returned true with no values");
1888 
1889  // Scan the information to see which is most popular: true or false. The
1890  // predecessors can be of the set true, false, or undef.
1891  unsigned NumTrue = 0, NumFalse = 0;
1892  for (const auto &XorOpValue : XorOpValues) {
1893  if (isa<UndefValue>(XorOpValue.first))
1894  // Ignore undefs for the count.
1895  continue;
1896  if (cast<ConstantInt>(XorOpValue.first)->isZero())
1897  ++NumFalse;
1898  else
1899  ++NumTrue;
1900  }
1901 
1902  // Determine which value to split on, true, false, or undef if neither.
1903  ConstantInt *SplitVal = nullptr;
1904  if (NumTrue > NumFalse)
1905  SplitVal = ConstantInt::getTrue(BB->getContext());
1906  else if (NumTrue != 0 || NumFalse != 0)
1907  SplitVal = ConstantInt::getFalse(BB->getContext());
1908 
1909  // Collect all of the blocks that this can be folded into so that we can
1910  // factor this once and clone it once.
1911  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1912  for (const auto &XorOpValue : XorOpValues) {
1913  if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1914  continue;
1915 
1916  BlocksToFoldInto.push_back(XorOpValue.second);
1917  }
1918 
1919  // If we inferred a value for all of the predecessors, then duplication won't
1920  // help us. However, we can just replace the LHS or RHS with the constant.
1921  if (BlocksToFoldInto.size() ==
1922  cast<PHINode>(BB->front()).getNumIncomingValues()) {
1923  if (!SplitVal) {
1924  // If all preds provide undef, just nuke the xor, because it is undef too.
1926  BO->eraseFromParent();
1927  } else if (SplitVal->isZero()) {
1928  // If all preds provide 0, replace the xor with the other input.
1929  BO->replaceAllUsesWith(BO->getOperand(isLHS));
1930  BO->eraseFromParent();
1931  } else {
1932  // If all preds provide 1, set the computed value to 1.
1933  BO->setOperand(!isLHS, SplitVal);
1934  }
1935 
1936  return true;
1937  }
1938 
1939  // If any of predecessors end with an indirect goto, we can't change its
1940  // destination. Same for CallBr.
1941  if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1942  return isa<IndirectBrInst>(Pred->getTerminator()) ||
1943  isa<CallBrInst>(Pred->getTerminator());
1944  }))
1945  return false;
1946 
1947  // Try to duplicate BB into PredBB.
1948  return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1949 }
1950 
1951 /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1952 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1953 /// NewPred using the entries from OldPred (suitably mapped).
1955  BasicBlock *OldPred,
1956  BasicBlock *NewPred,
1958  for (PHINode &PN : PHIBB->phis()) {
1959  // Ok, we have a PHI node. Figure out what the incoming value was for the
1960  // DestBlock.
1961  Value *IV = PN.getIncomingValueForBlock(OldPred);
1962 
1963  // Remap the value if necessary.
1964  if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1966  if (I != ValueMap.end())
1967  IV = I->second;
1968  }
1969 
1970  PN.addIncoming(IV, NewPred);
1971  }
1972 }
1973 
1974 /// Merge basic block BB into its sole predecessor if possible.
1976  BasicBlock *SinglePred = BB->getSinglePredecessor();
1977  if (!SinglePred)
1978  return false;
1979 
1980  const Instruction *TI = SinglePred->getTerminator();
1981  if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
1982  SinglePred == BB || hasAddressTakenAndUsed(BB))
1983  return false;
1984 
1985  // If SinglePred was a loop header, BB becomes one.
1986  if (LoopHeaders.erase(SinglePred))
1987  LoopHeaders.insert(BB);
1988 
1989  LVI->eraseBlock(SinglePred);
1991 
1992  // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1993  // BB code within one basic block `BB`), we need to invalidate the LVI
1994  // information associated with BB, because the LVI information need not be
1995  // true for all of BB after the merge. For example,
1996  // Before the merge, LVI info and code is as follows:
1997  // SinglePred: <LVI info1 for %p val>
1998  // %y = use of %p
1999  // call @exit() // need not transfer execution to successor.
2000  // assume(%p) // from this point on %p is true
2001  // br label %BB
2002  // BB: <LVI info2 for %p val, i.e. %p is true>
2003  // %x = use of %p
2004  // br label exit
2005  //
2006  // Note that this LVI info for blocks BB and SinglPred is correct for %p
2007  // (info2 and info1 respectively). After the merge and the deletion of the
2008  // LVI info1 for SinglePred. We have the following code:
2009  // BB: <LVI info2 for %p val>
2010  // %y = use of %p
2011  // call @exit()
2012  // assume(%p)
2013  // %x = use of %p <-- LVI info2 is correct from here onwards.
2014  // br label exit
2015  // LVI info2 for BB is incorrect at the beginning of BB.
2016 
2017  // Invalidate LVI information for BB if the LVI is not provably true for
2018  // all of BB.
2020  LVI->eraseBlock(BB);
2021  return true;
2022 }
2023 
2024 /// Update the SSA form. NewBB contains instructions that are copied from BB.
2025 /// ValueMapping maps old values in BB to new ones in NewBB.
2027  BasicBlock *BB, BasicBlock *NewBB,
2028  DenseMap<Instruction *, Value *> &ValueMapping) {
2029  // If there were values defined in BB that are used outside the block, then we
2030  // now have to update all uses of the value to use either the original value,
2031  // the cloned value, or some PHI derived value. This can require arbitrary
2032  // PHI insertion, of which we are prepared to do, clean these up now.
2033  SSAUpdater SSAUpdate;
2034  SmallVector<Use *, 16> UsesToRename;
2035 
2036  for (Instruction &I : *BB) {
2037  // Scan all uses of this instruction to see if it is used outside of its
2038  // block, and if so, record them in UsesToRename.
2039  for (Use &U : I.uses()) {
2040  Instruction *User = cast<Instruction>(U.getUser());
2041  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2042  if (UserPN->getIncomingBlock(U) == BB)
2043  continue;
2044  } else if (User->getParent() == BB)
2045  continue;
2046 
2047  UsesToRename.push_back(&U);
2048  }
2049 
2050  // If there are no uses outside the block, we're done with this instruction.
2051  if (UsesToRename.empty())
2052  continue;
2053  LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2054 
2055  // We found a use of I outside of BB. Rename all uses of I that are outside
2056  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2057  // with the two values we know.
2058  SSAUpdate.Initialize(I.getType(), I.getName());
2059  SSAUpdate.AddAvailableValue(BB, &I);
2060  SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2061 
2062  while (!UsesToRename.empty())
2063  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2064  LLVM_DEBUG(dbgs() << "\n");
2065  }
2066 }
2067 
2068 /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2069 /// arguments that come from PredBB. Return the map from the variables in the
2070 /// source basic block to the variables in the newly created basic block.
2073  BasicBlock::iterator BE, BasicBlock *NewBB,
2074  BasicBlock *PredBB) {
2075  // We are going to have to map operands from the source basic block to the new
2076  // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2077  // block, evaluate them to account for entry from PredBB.
2078  DenseMap<Instruction *, Value *> ValueMapping;
2079 
2080  // Clone the phi nodes of the source basic block into NewBB. The resulting
2081  // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2082  // might need to rewrite the operand of the cloned phi.
2083  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2084  PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2085  NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2086  ValueMapping[PN] = NewPN;
2087  }
2088 
2089  // Clone noalias scope declarations in the threaded block. When threading a
2090  // loop exit, we would otherwise end up with two idential scope declarations
2091  // visible at the same time.
2092  SmallVector<MDNode *> NoAliasScopes;
2093  DenseMap<MDNode *, MDNode *> ClonedScopes;
2094  LLVMContext &Context = PredBB->getContext();
2095  identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2096  cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2097 
2098  // Clone the non-phi instructions of the source basic block into NewBB,
2099  // keeping track of the mapping and using it to remap operands in the cloned
2100  // instructions.
2101  for (; BI != BE; ++BI) {
2102  Instruction *New = BI->clone();
2103  New->setName(BI->getName());
2104  NewBB->getInstList().push_back(New);
2105  ValueMapping[&*BI] = New;
2106  adaptNoAliasScopes(New, ClonedScopes, Context);
2107 
2108  // Remap operands to patch up intra-block references.
2109  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2110  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2111  DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
2112  if (I != ValueMapping.end())
2113  New->setOperand(i, I->second);
2114  }
2115  }
2116 
2117  return ValueMapping;
2118 }
2119 
2120 /// Attempt to thread through two successive basic blocks.
2122  Value *Cond) {
2123  // Consider:
2124  //
2125  // PredBB:
2126  // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2127  // %tobool = icmp eq i32 %cond, 0
2128  // br i1 %tobool, label %BB, label ...
2129  //
2130  // BB:
2131  // %cmp = icmp eq i32* %var, null
2132  // br i1 %cmp, label ..., label ...
2133  //
2134  // We don't know the value of %var at BB even if we know which incoming edge
2135  // we take to BB. However, once we duplicate PredBB for each of its incoming
2136  // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2137  // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2138 
2139  // Require that BB end with a Branch for simplicity.
2140  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2141  if (!CondBr)
2142  return false;
2143 
2144  // BB must have exactly one predecessor.
2145  BasicBlock *PredBB = BB->getSinglePredecessor();
2146  if (!PredBB)
2147  return false;
2148 
2149  // Require that PredBB end with a conditional Branch. If PredBB ends with an
2150  // unconditional branch, we should be merging PredBB and BB instead. For
2151  // simplicity, we don't deal with a switch.
2152  BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2153  if (!PredBBBranch || PredBBBranch->isUnconditional())
2154  return false;
2155 
2156  // If PredBB has exactly one incoming edge, we don't gain anything by copying
2157  // PredBB.
2158  if (PredBB->getSinglePredecessor())
2159  return false;
2160 
2161  // Don't thread through PredBB if it contains a successor edge to itself, in
2162  // which case we would infinite loop. Suppose we are threading an edge from
2163  // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2164  // successor edge to itself. If we allowed jump threading in this case, we
2165  // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2166  // PredBB.thread has a successor edge to PredBB, we would immediately come up
2167  // with another jump threading opportunity from PredBB.thread through PredBB
2168  // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2169  // would keep peeling one iteration from PredBB.
2170  if (llvm::is_contained(successors(PredBB), PredBB))
2171  return false;
2172 
2173  // Don't thread across a loop header.
2174  if (LoopHeaders.count(PredBB))
2175  return false;
2176 
2177  // Avoid complication with duplicating EH pads.
2178  if (PredBB->isEHPad())
2179  return false;
2180 
2181  // Find a predecessor that we can thread. For simplicity, we only consider a
2182  // successor edge out of BB to which we thread exactly one incoming edge into
2183  // PredBB.
2184  unsigned ZeroCount = 0;
2185  unsigned OneCount = 0;
2186  BasicBlock *ZeroPred = nullptr;
2187  BasicBlock *OnePred = nullptr;
2188  for (BasicBlock *P : predecessors(PredBB)) {
2189  if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2190  evaluateOnPredecessorEdge(BB, P, Cond))) {
2191  if (CI->isZero()) {
2192  ZeroCount++;
2193  ZeroPred = P;
2194  } else if (CI->isOne()) {
2195  OneCount++;
2196  OnePred = P;
2197  }
2198  }
2199  }
2200 
2201  // Disregard complicated cases where we have to thread multiple edges.
2202  BasicBlock *PredPredBB;
2203  if (ZeroCount == 1) {
2204  PredPredBB = ZeroPred;
2205  } else if (OneCount == 1) {
2206  PredPredBB = OnePred;
2207  } else {
2208  return false;
2209  }
2210 
2211  BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2212 
2213  // If threading to the same block as we come from, we would infinite loop.
2214  if (SuccBB == BB) {
2215  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2216  << "' - would thread to self!\n");
2217  return false;
2218  }
2219 
2220  // If threading this would thread across a loop header, don't thread the edge.
2221  // See the comments above findLoopHeaders for justifications and caveats.
2222  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2223  LLVM_DEBUG({
2224  bool BBIsHeader = LoopHeaders.count(BB);
2225  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2226  dbgs() << " Not threading across "
2227  << (BBIsHeader ? "loop header BB '" : "block BB '")
2228  << BB->getName() << "' to dest "
2229  << (SuccIsHeader ? "loop header BB '" : "block BB '")
2230  << SuccBB->getName()
2231  << "' - it might create an irreducible loop!\n";
2232  });
2233  return false;
2234  }
2235 
2236  // Compute the cost of duplicating BB and PredBB.
2237  unsigned BBCost =
2238  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2239  unsigned PredBBCost = getJumpThreadDuplicationCost(
2240  PredBB, PredBB->getTerminator(), BBDupThreshold);
2241 
2242  // Give up if costs are too high. We need to check BBCost and PredBBCost
2243  // individually before checking their sum because getJumpThreadDuplicationCost
2244  // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2245  if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2246  BBCost + PredBBCost > BBDupThreshold) {
2247  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2248  << "' - Cost is too high: " << PredBBCost
2249  << " for PredBB, " << BBCost << "for BB\n");
2250  return false;
2251  }
2252 
2253  // Now we are ready to duplicate PredBB.
2254  threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2255  return true;
2256 }
2257 
2259  BasicBlock *PredBB,
2260  BasicBlock *BB,
2261  BasicBlock *SuccBB) {
2262  LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2263  << BB->getName() << "'\n");
2264 
2265  BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2266  BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2267 
2268  BasicBlock *NewBB =
2269  BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2270  PredBB->getParent(), PredBB);
2271  NewBB->moveAfter(PredBB);
2272 
2273  // Set the block frequency of NewBB.
2274  if (HasProfileData) {
2275  auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2276  BPI->getEdgeProbability(PredPredBB, PredBB);
2277  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2278  }
2279 
2280  // We are going to have to map operands from the original BB block to the new
2281  // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2282  // to account for entry from PredPredBB.
2283  DenseMap<Instruction *, Value *> ValueMapping =
2284  cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2285 
2286  // Copy the edge probabilities from PredBB to NewBB.
2287  if (HasProfileData)
2288  BPI->copyEdgeProbabilities(PredBB, NewBB);
2289 
2290  // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2291  // This eliminates predecessors from PredPredBB, which requires us to simplify
2292  // any PHI nodes in PredBB.
2293  Instruction *PredPredTerm = PredPredBB->getTerminator();
2294  for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2295  if (PredPredTerm->getSuccessor(i) == PredBB) {
2296  PredBB->removePredecessor(PredPredBB, true);
2297  PredPredTerm->setSuccessor(i, NewBB);
2298  }
2299 
2300  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2301  ValueMapping);
2302  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2303  ValueMapping);
2304 
2305  DTU->applyUpdatesPermissive(
2306  {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2307  {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2308  {DominatorTree::Insert, PredPredBB, NewBB},
2309  {DominatorTree::Delete, PredPredBB, PredBB}});
2310 
2311  updateSSA(PredBB, NewBB, ValueMapping);
2312 
2313  // Clean up things like PHI nodes with single operands, dead instructions,
2314  // etc.
2315  SimplifyInstructionsInBlock(NewBB, TLI);
2316  SimplifyInstructionsInBlock(PredBB, TLI);
2317 
2318  SmallVector<BasicBlock *, 1> PredsToFactor;
2319  PredsToFactor.push_back(NewBB);
2320  threadEdge(BB, PredsToFactor, SuccBB);
2321 }
2322 
2323 /// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2325  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2326  BasicBlock *SuccBB) {
2327  // If threading to the same block as we come from, we would infinite loop.
2328  if (SuccBB == BB) {
2329  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2330  << "' - would thread to self!\n");
2331  return false;
2332  }
2333 
2334  // If threading this would thread across a loop header, don't thread the edge.
2335  // See the comments above findLoopHeaders for justifications and caveats.
2336  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2337  LLVM_DEBUG({
2338  bool BBIsHeader = LoopHeaders.count(BB);
2339  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2340  dbgs() << " Not threading across "
2341  << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2342  << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2343  << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2344  });
2345  return false;
2346  }
2347 
2348  unsigned JumpThreadCost =
2349  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2350  if (JumpThreadCost > BBDupThreshold) {
2351  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2352  << "' - Cost is too high: " << JumpThreadCost << "\n");
2353  return false;
2354  }
2355 
2356  threadEdge(BB, PredBBs, SuccBB);
2357  return true;
2358 }
2359 
2360 /// threadEdge - We have decided that it is safe and profitable to factor the
2361 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2362 /// across BB. Transform the IR to reflect this change.
2364  const SmallVectorImpl<BasicBlock *> &PredBBs,
2365  BasicBlock *SuccBB) {
2366  assert(SuccBB != BB && "Don't create an infinite loop");
2367 
2368  assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2369  "Don't thread across loop headers");
2370 
2371  // And finally, do it! Start by factoring the predecessors if needed.
2372  BasicBlock *PredBB;
2373  if (PredBBs.size() == 1)
2374  PredBB = PredBBs[0];
2375  else {
2376  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2377  << " common predecessors.\n");
2378  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2379  }
2380 
2381  // And finally, do it!
2382  LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2383  << "' to '" << SuccBB->getName()
2384  << ", across block:\n " << *BB << "\n");
2385 
2386  LVI->threadEdge(PredBB, BB, SuccBB);
2387 
2388  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
2389  BB->getName()+".thread",
2390  BB->getParent(), BB);
2391  NewBB->moveAfter(PredBB);
2392 
2393  // Set the block frequency of NewBB.
2394  if (HasProfileData) {
2395  auto NewBBFreq =
2396  BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2397  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2398  }
2399 
2400  // Copy all the instructions from BB to NewBB except the terminator.
2401  DenseMap<Instruction *, Value *> ValueMapping =
2402  cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
2403 
2404  // We didn't copy the terminator from BB over to NewBB, because there is now
2405  // an unconditional jump to SuccBB. Insert the unconditional jump.
2406  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2407  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2408 
2409  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2410  // PHI nodes for NewBB now.
2411  addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2412 
2413  // Update the terminator of PredBB to jump to NewBB instead of BB. This
2414  // eliminates predecessors from BB, which requires us to simplify any PHI
2415  // nodes in BB.
2416  Instruction *PredTerm = PredBB->getTerminator();
2417  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2418  if (PredTerm->getSuccessor(i) == BB) {
2419  BB->removePredecessor(PredBB, true);
2420  PredTerm->setSuccessor(i, NewBB);
2421  }
2422 
2423  // Enqueue required DT updates.
2424  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2425  {DominatorTree::Insert, PredBB, NewBB},
2426  {DominatorTree::Delete, PredBB, BB}});
2427 
2428  updateSSA(BB, NewBB, ValueMapping);
2429 
2430  // At this point, the IR is fully up to date and consistent. Do a quick scan
2431  // over the new instructions and zap any that are constants or dead. This
2432  // frequently happens because of phi translation.
2433  SimplifyInstructionsInBlock(NewBB, TLI);
2434 
2435  // Update the edge weight from BB to SuccBB, which should be less than before.
2436  updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
2437 
2438  // Threaded an edge!
2439  ++NumThreads;
2440 }
2441 
2442 /// Create a new basic block that will be the predecessor of BB and successor of
2443 /// all blocks in Preds. When profile data is available, update the frequency of
2444 /// this new block.
2445 BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2446  ArrayRef<BasicBlock *> Preds,
2447  const char *Suffix) {
2449 
2450  // Collect the frequencies of all predecessors of BB, which will be used to
2451  // update the edge weight of the result of splitting predecessors.
2453  if (HasProfileData)
2454  for (auto Pred : Preds)
2455  FreqMap.insert(std::make_pair(
2456  Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2457 
2458  // In the case when BB is a LandingPad block we create 2 new predecessors
2459  // instead of just one.
2460  if (BB->isLandingPad()) {
2461  std::string NewName = std::string(Suffix) + ".split-lp";
2462  SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2463  } else {
2464  NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2465  }
2466 
2467  std::vector<DominatorTree::UpdateType> Updates;
2468  Updates.reserve((2 * Preds.size()) + NewBBs.size());
2469  for (auto NewBB : NewBBs) {
2470  BlockFrequency NewBBFreq(0);
2471  Updates.push_back({DominatorTree::Insert, NewBB, BB});
2472  for (auto Pred : predecessors(NewBB)) {
2473  Updates.push_back({DominatorTree::Delete, Pred, BB});
2474  Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2475  if (HasProfileData) // Update frequencies between Pred -> NewBB.
2476  NewBBFreq += FreqMap.lookup(Pred);
2477  }
2478  if (HasProfileData) // Apply the summed frequency to NewBB.
2479  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2480  }
2481 
2482  DTU->applyUpdatesPermissive(Updates);
2483  return NewBBs[0];
2484 }
2485 
2486 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2487  const Instruction *TI = BB->getTerminator();
2488  assert(TI->getNumSuccessors() > 1 && "not a split");
2489 
2490  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
2491  if (!WeightsNode)
2492  return false;
2493 
2494  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
2495  if (MDName->getString() != "branch_weights")
2496  return false;
2497 
2498  // Ensure there are weights for all of the successors. Note that the first
2499  // operand to the metadata node is a name, not a weight.
2500  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2501 }
2502 
2503 /// Update the block frequency of BB and branch weight and the metadata on the
2504 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2505 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
2506 void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2507  BasicBlock *BB,
2508  BasicBlock *NewBB,
2509  BasicBlock *SuccBB) {
2510  if (!HasProfileData)
2511  return;
2512 
2513  assert(BFI && BPI && "BFI & BPI should have been created here");
2514 
2515  // As the edge from PredBB to BB is deleted, we have to update the block
2516  // frequency of BB.
2517  auto BBOrigFreq = BFI->getBlockFreq(BB);
2518  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2519  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2520  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2521  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2522 
2523  // Collect updated outgoing edges' frequencies from BB and use them to update
2524  // edge probabilities.
2525  SmallVector<uint64_t, 4> BBSuccFreq;
2526  for (BasicBlock *Succ : successors(BB)) {
2527  auto SuccFreq = (Succ == SuccBB)
2528  ? BB2SuccBBFreq - NewBBFreq
2529  : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2530  BBSuccFreq.push_back(SuccFreq.getFrequency());
2531  }
2532 
2533  uint64_t MaxBBSuccFreq =
2534  *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2535 
2537  if (MaxBBSuccFreq == 0)
2538  BBSuccProbs.assign(BBSuccFreq.size(),
2539  {1, static_cast<unsigned>(BBSuccFreq.size())});
2540  else {
2541  for (uint64_t Freq : BBSuccFreq)
2542  BBSuccProbs.push_back(
2543  BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2544  // Normalize edge probabilities so that they sum up to one.
2545  BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2546  BBSuccProbs.end());
2547  }
2548 
2549  // Update edge probabilities in BPI.
2550  BPI->setEdgeProbability(BB, BBSuccProbs);
2551 
2552  // Update the profile metadata as well.
2553  //
2554  // Don't do this if the profile of the transformed blocks was statically
2555  // estimated. (This could occur despite the function having an entry
2556  // frequency in completely cold parts of the CFG.)
2557  //
2558  // In this case we don't want to suggest to subsequent passes that the
2559  // calculated weights are fully consistent. Consider this graph:
2560  //
2561  // check_1
2562  // 50% / |
2563  // eq_1 | 50%
2564  // \ |
2565  // check_2
2566  // 50% / |
2567  // eq_2 | 50%
2568  // \ |
2569  // check_3
2570  // 50% / |
2571  // eq_3 | 50%
2572  // \ |
2573  //
2574  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2575  // the overall probabilities are inconsistent; the total probability that the
2576  // value is either 1, 2 or 3 is 150%.
2577  //
2578  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2579  // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2580  // the loop exit edge. Then based solely on static estimation we would assume
2581  // the loop was extremely hot.
2582  //
2583  // FIXME this locally as well so that BPI and BFI are consistent as well. We
2584  // shouldn't make edges extremely likely or unlikely based solely on static
2585  // estimation.
2586  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
2587  SmallVector<uint32_t, 4> Weights;
2588  for (auto Prob : BBSuccProbs)
2589  Weights.push_back(Prob.getNumerator());
2590 
2591  auto TI = BB->getTerminator();
2592  TI->setMetadata(
2593  LLVMContext::MD_prof,
2594  MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2595  }
2596 }
2597 
2598 /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2599 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2600 /// If we can duplicate the contents of BB up into PredBB do so now, this
2601 /// improves the odds that the branch will be on an analyzable instruction like
2602 /// a compare.
2604  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2605  assert(!PredBBs.empty() && "Can't handle an empty set");
2606 
2607  // If BB is a loop header, then duplicating this block outside the loop would
2608  // cause us to transform this into an irreducible loop, don't do this.
2609  // See the comments above findLoopHeaders for justifications and caveats.
2610  if (LoopHeaders.count(BB)) {
2611  LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2612  << "' into predecessor block '" << PredBBs[0]->getName()
2613  << "' - it might create an irreducible loop!\n");
2614  return false;
2615  }
2616 
2617  unsigned DuplicationCost =
2618  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2619  if (DuplicationCost > BBDupThreshold) {
2620  LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2621  << "' - Cost is too high: " << DuplicationCost << "\n");
2622  return false;
2623  }
2624 
2625  // And finally, do it! Start by factoring the predecessors if needed.
2626  std::vector<DominatorTree::UpdateType> Updates;
2627  BasicBlock *PredBB;
2628  if (PredBBs.size() == 1)
2629  PredBB = PredBBs[0];
2630  else {
2631  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2632  << " common predecessors.\n");
2633  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2634  }
2635  Updates.push_back({DominatorTree::Delete, PredBB, BB});
2636 
2637  // Okay, we decided to do this! Clone all the instructions in BB onto the end
2638  // of PredBB.
2639  LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2640  << "' into end of '" << PredBB->getName()
2641  << "' to eliminate branch on phi. Cost: "
2642  << DuplicationCost << " block is:" << *BB << "\n");
2643 
2644  // Unless PredBB ends with an unconditional branch, split the edge so that we
2645  // can just clone the bits from BB into the end of the new PredBB.
2646  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2647 
2648  if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2649  BasicBlock *OldPredBB = PredBB;
2650  PredBB = SplitEdge(OldPredBB, BB);
2651  Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2652  Updates.push_back({DominatorTree::Insert, PredBB, BB});
2653  Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2654  OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2655  }
2656 
2657  // We are going to have to map operands from the original BB block into the
2658  // PredBB block. Evaluate PHI nodes in BB.
2659  DenseMap<Instruction*, Value*> ValueMapping;
2660 
2661  BasicBlock::iterator BI = BB->begin();
2662  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2663  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2664  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2665  // mapping and using it to remap operands in the cloned instructions.
2666  for (; BI != BB->end(); ++BI) {
2667  Instruction *New = BI->clone();
2668 
2669  // Remap operands to patch up intra-block references.
2670  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2671  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2672  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2673  if (I != ValueMapping.end())
2674  New->setOperand(i, I->second);
2675  }
2676 
2677  // If this instruction can be simplified after the operands are updated,
2678  // just use the simplified value instead. This frequently happens due to
2679  // phi translation.
2680  if (Value *IV = SimplifyInstruction(
2681  New,
2682  {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2683  ValueMapping[&*BI] = IV;
2684  if (!New->mayHaveSideEffects()) {
2685  New->deleteValue();
2686  New = nullptr;
2687  }
2688  } else {
2689  ValueMapping[&*BI] = New;
2690  }
2691  if (New) {
2692  // Otherwise, insert the new instruction into the block.
2693  New->setName(BI->getName());
2694  PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2695  // Update Dominance from simplified New instruction operands.
2696  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2697  if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2698  Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2699  }
2700  }
2701 
2702  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2703  // add entries to the PHI nodes for branch from PredBB now.
2704  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2705  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2706  ValueMapping);
2707  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2708  ValueMapping);
2709 
2710  updateSSA(BB, PredBB, ValueMapping);
2711 
2712  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2713  // that we nuked.
2714  BB->removePredecessor(PredBB, true);
2715 
2716  // Remove the unconditional branch at the end of the PredBB block.
2717  OldPredBranch->eraseFromParent();
2718  if (HasProfileData)
2719  BPI->copyEdgeProbabilities(BB, PredBB);
2720  DTU->applyUpdatesPermissive(Updates);
2721 
2722  ++NumDupes;
2723  return true;
2724 }
2725 
2726 // Pred is a predecessor of BB with an unconditional branch to BB. SI is
2727 // a Select instruction in Pred. BB has other predecessors and SI is used in
2728 // a PHI node in BB. SI has no other use.
2729 // A new basic block, NewBB, is created and SI is converted to compare and
2730 // conditional branch. SI is erased from parent.
2732  SelectInst *SI, PHINode *SIUse,
2733  unsigned Idx) {
2734  // Expand the select.
2735  //
2736  // Pred --
2737  // | v
2738  // | NewBB
2739  // | |
2740  // |-----
2741  // v
2742  // BB
2743  BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2744  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2745  BB->getParent(), BB);
2746  // Move the unconditional branch to NewBB.
2747  PredTerm->removeFromParent();
2748  NewBB->getInstList().insert(NewBB->end(), PredTerm);
2749  // Create a conditional branch and update PHI nodes.
2750  auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2751  BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2752  SIUse->setIncomingValue(Idx, SI->getFalseValue());
2753  SIUse->addIncoming(SI->getTrueValue(), NewBB);
2754 
2755  // The select is now dead.
2756  SI->eraseFromParent();
2757  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2758  {DominatorTree::Insert, Pred, NewBB}});
2759 
2760  // Update any other PHI nodes in BB.
2761  for (BasicBlock::iterator BI = BB->begin();
2762  PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2763  if (Phi != SIUse)
2764  Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2765 }
2766 
2768  PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2769 
2770  if (!CondPHI || CondPHI->getParent() != BB)
2771  return false;
2772 
2773  for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2774  BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2775  SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2776 
2777  // The second and third condition can be potentially relaxed. Currently
2778  // the conditions help to simplify the code and allow us to reuse existing
2779  // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2780  if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2781  continue;
2782 
2783  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2784  if (!PredTerm || !PredTerm->isUnconditional())
2785  continue;
2786 
2787  unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2788  return true;
2789  }
2790  return false;
2791 }
2792 
2793 /// tryToUnfoldSelect - Look for blocks of the form
2794 /// bb1:
2795 /// %a = select
2796 /// br bb2
2797 ///
2798 /// bb2:
2799 /// %p = phi [%a, %bb1] ...
2800 /// %c = icmp %p
2801 /// br i1 %c
2802 ///
2803 /// And expand the select into a branch structure if one of its arms allows %c
2804 /// to be folded. This later enables threading from bb1 over bb2.
2806  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2807  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2808  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2809 
2810  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2811  CondLHS->getParent() != BB)
2812  return false;
2813 
2814  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2815  BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2816  SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2817 
2818  // Look if one of the incoming values is a select in the corresponding
2819  // predecessor.
2820  if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2821  continue;
2822 
2823  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2824  if (!PredTerm || !PredTerm->isUnconditional())
2825  continue;
2826 
2827  // Now check if one of the select values would allow us to constant fold the
2828  // terminator in BB. We don't do the transform if both sides fold, those
2829  // cases will be threaded in any case.
2830  LazyValueInfo::Tristate LHSFolds =
2831  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2832  CondRHS, Pred, BB, CondCmp);
2833  LazyValueInfo::Tristate RHSFolds =
2834  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2835  CondRHS, Pred, BB, CondCmp);
2836  if ((LHSFolds != LazyValueInfo::Unknown ||
2837  RHSFolds != LazyValueInfo::Unknown) &&
2838  LHSFolds != RHSFolds) {
2839  unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2840  return true;
2841  }
2842  }
2843  return false;
2844 }
2845 
2846 /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2847 /// same BB in the form
2848 /// bb:
2849 /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2850 /// %s = select %p, trueval, falseval
2851 ///
2852 /// or
2853 ///
2854 /// bb:
2855 /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2856 /// %c = cmp %p, 0
2857 /// %s = select %c, trueval, falseval
2858 ///
2859 /// And expand the select into a branch structure. This later enables
2860 /// jump-threading over bb in this pass.
2861 ///
2862 /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2863 /// select if the associated PHI has at least one constant. If the unfolded
2864 /// select is not jump-threaded, it will be folded again in the later
2865 /// optimizations.
2867  // This transform would reduce the quality of msan diagnostics.
2868  // Disable this transform under MemorySanitizer.
2869  if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2870  return false;
2871 
2872  // If threading this would thread across a loop header, don't thread the edge.
2873  // See the comments above findLoopHeaders for justifications and caveats.
2874  if (LoopHeaders.count(BB))
2875  return false;
2876 
2877  for (BasicBlock::iterator BI = BB->begin();
2878  PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2879  // Look for a Phi having at least one constant incoming value.
2880  if (llvm::all_of(PN->incoming_values(),
2881  [](Value *V) { return !isa<ConstantInt>(V); }))
2882  continue;
2883 
2884  auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2885  using namespace PatternMatch;
2886 
2887  // Check if SI is in BB and use V as condition.
2888  if (SI->getParent() != BB)
2889  return false;
2890  Value *Cond = SI->getCondition();
2891  bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2892  return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2893  };
2894 
2895  SelectInst *SI = nullptr;
2896  for (Use &U : PN->uses()) {
2897  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2898  // Look for a ICmp in BB that compares PN with a constant and is the
2899  // condition of a Select.
2900  if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2901  isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2902  if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2903  if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2904  SI = SelectI;
2905  break;
2906  }
2907  } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2908  // Look for a Select in BB that uses PN as condition.
2909  if (isUnfoldCandidate(SelectI, U.get())) {
2910  SI = SelectI;
2911  break;
2912  }
2913  }
2914  }
2915 
2916  if (!SI)
2917  continue;
2918  // Expand the select.
2919  Value *Cond = SI->getCondition();
2920  if (InsertFreezeWhenUnfoldingSelect &&
2922  &DTU->getDomTree()))
2923  Cond = new FreezeInst(Cond, "cond.fr", SI);
2925  BasicBlock *SplitBB = SI->getParent();
2926  BasicBlock *NewBB = Term->getParent();
2927  PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2928  NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2929  NewPN->addIncoming(SI->getFalseValue(), BB);
2930  SI->replaceAllUsesWith(NewPN);
2931  SI->eraseFromParent();
2932  // NewBB and SplitBB are newly created blocks which require insertion.
2933  std::vector<DominatorTree::UpdateType> Updates;
2934  Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2935  Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2936  Updates.push_back({DominatorTree::Insert, BB, NewBB});
2937  Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2938  // BB's successors were moved to SplitBB, update DTU accordingly.
2939  for (auto *Succ : successors(SplitBB)) {
2940  Updates.push_back({DominatorTree::Delete, BB, Succ});
2941  Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2942  }
2943  DTU->applyUpdatesPermissive(Updates);
2944  return true;
2945  }
2946  return false;
2947 }
2948 
2949 /// Try to propagate a guard from the current BB into one of its predecessors
2950 /// in case if another branch of execution implies that the condition of this
2951 /// guard is always true. Currently we only process the simplest case that
2952 /// looks like:
2953 ///
2954 /// Start:
2955 /// %cond = ...
2956 /// br i1 %cond, label %T1, label %F1
2957 /// T1:
2958 /// br label %Merge
2959 /// F1:
2960 /// br label %Merge
2961 /// Merge:
2962 /// %condGuard = ...
2963 /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2964 ///
2965 /// And cond either implies condGuard or !condGuard. In this case all the
2966 /// instructions before the guard can be duplicated in both branches, and the
2967 /// guard is then threaded to one of them.
2969  using namespace PatternMatch;
2970 
2971  // We only want to deal with two predecessors.
2972  BasicBlock *Pred1, *Pred2;
2973  auto PI = pred_begin(BB), PE = pred_end(BB);
2974  if (PI == PE)
2975  return false;
2976  Pred1 = *PI++;
2977  if (PI == PE)
2978  return false;
2979  Pred2 = *PI++;
2980  if (PI != PE)
2981  return false;
2982  if (Pred1 == Pred2)
2983  return false;
2984 
2985  // Try to thread one of the guards of the block.
2986  // TODO: Look up deeper than to immediate predecessor?
2987  auto *Parent = Pred1->getSinglePredecessor();
2988  if (!Parent || Parent != Pred2->getSinglePredecessor())
2989  return false;
2990 
2991  if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
2992  for (auto &I : *BB)
2993  if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
2994  return true;
2995 
2996  return false;
2997 }
2998 
2999 /// Try to propagate the guard from BB which is the lower block of a diamond
3000 /// to one of its branches, in case if diamond's condition implies guard's
3001 /// condition.
3003  BranchInst *BI) {
3004  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3005  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3006  Value *GuardCond = Guard->getArgOperand(0);
3007  Value *BranchCond = BI->getCondition();
3008  BasicBlock *TrueDest = BI->getSuccessor(0);
3009  BasicBlock *FalseDest = BI->getSuccessor(1);
3010 
3011  auto &DL = BB->getModule()->getDataLayout();
3012  bool TrueDestIsSafe = false;
3013  bool FalseDestIsSafe = false;
3014 
3015  // True dest is safe if BranchCond => GuardCond.
3016  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3017  if (Impl && *Impl)
3018  TrueDestIsSafe = true;
3019  else {
3020  // False dest is safe if !BranchCond => GuardCond.
3021  Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3022  if (Impl && *Impl)
3023  FalseDestIsSafe = true;
3024  }
3025 
3026  if (!TrueDestIsSafe && !FalseDestIsSafe)
3027  return false;
3028 
3029  BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3030  BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3031 
3032  ValueToValueMapTy UnguardedMapping, GuardedMapping;
3033  Instruction *AfterGuard = Guard->getNextNode();
3034  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
3035  if (Cost > BBDupThreshold)
3036  return false;
3037  // Duplicate all instructions before the guard and the guard itself to the
3038  // branch where implication is not proved.
3040  BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3041  assert(GuardedBlock && "Could not create the guarded block?");
3042  // Duplicate all instructions before the guard in the unguarded branch.
3043  // Since we have successfully duplicated the guarded block and this block
3044  // has fewer instructions, we expect it to succeed.
3046  BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3047  assert(UnguardedBlock && "Could not create the unguarded block?");
3048  LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3049  << GuardedBlock->getName() << "\n");
3050  // Some instructions before the guard may still have uses. For them, we need
3051  // to create Phi nodes merging their copies in both guarded and unguarded
3052  // branches. Those instructions that have no uses can be just removed.
3054  for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3055  if (!isa<PHINode>(&*BI))
3056  ToRemove.push_back(&*BI);
3057 
3058  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
3059  assert(InsertionPoint && "Empty block?");
3060  // Substitute with Phis & remove.
3061  for (auto *Inst : reverse(ToRemove)) {
3062  if (!Inst->use_empty()) {
3063  PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3064  NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3065  NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3066  NewPN->insertBefore(InsertionPoint);
3067  Inst->replaceAllUsesWith(NewPN);
3068  }
3069  Inst->eraseFromParent();
3070  }
3071  return true;
3072 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::SSAUpdater::Initialize
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::array_pod_sort
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1446
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
getBestDestForJumpOnUndef
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
Definition: JumpThreading.cpp:1000
llvm::SplitLandingPadPredecessors
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition: BasicBlockUtils.cpp:1268
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:200
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::ConstantExpr::getNot
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2659
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
intptr_t
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
Metadata.h
replaceFoldableUses
static void replaceFoldableUses(Instruction *Cond, Value *ToVal)
Definition: JumpThreading.cpp:492
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::JumpThreadingPass::unfoldSelectInstr
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
Definition: JumpThreading.cpp:2731
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4589
llvm::FindFunctionBackedges
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
Scalar.h
llvm::JumpThreadingPass::findLoopHeaders
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
Definition: JumpThreading.cpp:609
Loads.h
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1657
llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition: BranchProbability.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::isImpliedCondition
Optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
Definition: ValueTracking.cpp:6627
llvm::JumpThreadingPass::processBranchOnXOR
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
Definition: JumpThreading.cpp:1839
hasAddressTakenAndUsed
static bool hasAddressTakenAndUsed(BasicBlock *BB)
Definition: JumpThreading.cpp:1018
ImplicationSearchThreshold
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5241
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::LazyValueAnalysis
Analysis to compute lazy value information.
Definition: LazyValueInfo.h:131
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::jumpthreading::WantBlockAddress
@ WantBlockAddress
Definition: JumpThreading.h:60
llvm::replaceNonLocalUsesWith
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:2691
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1008
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:536
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
MapVector.h
DomTreeUpdater.h
llvm::JumpThreadingPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, std::unique_ptr< BlockFrequencyInfo > BFI, std::unique_ptr< BranchProbabilityInfo > BPI)
Definition: JumpThreading.cpp:379
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1728
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
isOpDefinedInBlock
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
Definition: JumpThreading.cpp:1298
llvm::jumpthreading::ConstantPreference
ConstantPreference
Definition: JumpThreading.h:60
llvm::JumpThreadingPass::maybethreadThroughTwoBasicBlocks
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
Definition: JumpThreading.cpp:2121
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
llvm::DenseMapIterator
Definition: DenseMap.h:56
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::JumpThreadingPass::JumpThreadingPass
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect=false, int T=-1)
Definition: JumpThreading.cpp:182
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::JumpThreadingPass::evaluateOnPredecessorEdge
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond)
Definition: JumpThreading.cpp:1585
JumpThreading.h
llvm::Optional< bool >
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3151
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::SimplifyCmpInst
Value * SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5333
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
llvm::JumpThreadingPass::processImpliedCondition
bool processImpliedCondition(BasicBlock *BB)
Definition: JumpThreading.cpp:1255
LazyValueInfo.h
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2724
ConstantFolding.h
llvm::JumpThreadingPass::tryThreadEdge
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
Definition: JumpThreading.cpp:2324
llvm::Instruction::isExceptionalTerminator
bool isExceptionalTerminator() const
Definition: Instruction.h:170
Use.h
llvm::combineMetadataForCSE
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2577
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::JumpThreadingPass::duplicateCondBranchOnPHIIntoPred
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
Definition: JumpThreading.cpp:2603
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
getKnownConstant
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
Definition: JumpThreading.cpp:622
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1113
AliasAnalysis.h
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::JumpThreadingPass::threadEdge
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
Definition: JumpThreading.cpp:2363
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::MapVector::begin
iterator begin()
Definition: MapVector.h:69
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:701
BBDuplicateThreshold
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::LazyValueInfo::Unknown
@ Unknown
Definition: LazyValueInfo.h:61
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2721
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::initializeJumpThreadingPass
void initializeJumpThreadingPass(PassRegistry &)
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::JumpThreadingPass::releaseMemory
void releaseMemory()
Definition: JumpThreading.h:109
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::jumpthreading::WantInteger
@ WantInteger
Definition: JumpThreading.h:60
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:177
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::LazyValueInfo::True
@ True
Definition: LazyValueInfo.h:61
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2810
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6291
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
Threading
jump Jump Threading
Definition: JumpThreading.cpp:175
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
threading
jump threading
Definition: JumpThreading.cpp:174
SmallPtrSet.h
llvm::Instruction::getAAMetadata
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: TypeBasedAliasAnalysis.cpp:533
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1148
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::ConstantRange::add
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
Definition: ConstantRange.cpp:907
PatternMatch.h
llvm::RemoveRedundantDbgInstrs
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
Definition: BasicBlockUtils.cpp:432
llvm::JumpThreadingPass::tryToUnfoldSelect
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
Definition: JumpThreading.cpp:2805
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2717
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1396
llvm::JumpThreadingPass::tryToUnfoldSelectInCurrBB
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
Definition: JumpThreading.cpp:2866
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:242
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
Type.h
BranchProbability.h
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3141
jump
The object format emitted by the WebAssembly backed is documented see the home and packaging for producing WebAssembly applications that can run in browsers and other environments wasi sdk provides a more minimal C C SDK based on llvm and a libc based on for producing WebAssemmbly applictions that use the WASI ABI Rust provides WebAssembly support integrated into Cargo There are two main which provides a relatively minimal environment that has an emphasis on being native wasm32 unknown which uses Emscripten internally and provides standard C C filesystem GL and SDL bindings For more and br_table instructions can support having a value on the value stack across the jump(sometimes). We should(a) model this
llvm::findAvailablePtrLoadStore
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:521
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1107
llvm::DenseSet< Value * >
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::DuplicateInstructionsInSplitBetween
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
Definition: CloneFunction.cpp:887
llvm::TryToSimplifyUncondBranchFromEmptyBlock
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1043
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2373
llvm::ConstantRange::inverse
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
Definition: ConstantRange.cpp:1521
llvm::BlockFrequency
Definition: BlockFrequency.h:24
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
BranchProbabilityInfo.h
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2510
llvm::LazyValueInfoWrapperPass
Wrapper around LazyValueInfo.
Definition: LazyValueInfo.h:142
llvm::JumpThreadingPass::simplifyPartiallyRedundantLoad
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
Definition: JumpThreading.cpp:1309
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2775
findMostPopularDest
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
Definition: JumpThreading.cpp:1550
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3116
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:2242
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::adaptNoAliasScopes
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
Definition: CloneFunction.cpp:957
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1612
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1963
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1738
llvm::cloneNoAliasScopes
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
Definition: CloneFunction.cpp:932
llvm::LazyValueInfo
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:31
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3138
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::JumpThreadingPass::processGuards
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
Definition: JumpThreading.cpp:2968
llvm::DominatorTreeBase::reset
void reset()
Definition: GenericDomTree.h:806
CFG.h
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:848
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:741
llvm::BinaryOperator
Definition: InstrTypes.h:189
getJumpThreadDuplicationCost
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
Definition: JumpThreading.cpp:517
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
llvm::BasicBlock::moveAfter
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:138
DataLayout.h
llvm::JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
Definition: JumpThreading.cpp:1975
llvm::JumpThreadingPass::threadGuard
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
Definition: JumpThreading.cpp:3002
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1028
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:789
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::pred_size
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:124
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::JumpThreadingPass::threadThroughTwoBasicBlocks
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
Definition: JumpThreading.cpp:2258
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:776
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
ConstantRange.h
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
updatePredecessorProfileMetadata
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
Definition: JumpThreading.cpp:222
SSAUpdater.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:232
llvm::PredIterator
Definition: CFG.h:43
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::ValueMap
See the file comment.
Definition: ValueMap.h:85
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:675
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:855
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::MapVector::end
iterator end()
Definition: MapVector.h:71
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
addPHINodeEntriesForMappedBlock
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value * > &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
Definition: JumpThreading.cpp:1954
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:604
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:493
llvm::ConstantFoldTerminator
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:128
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
PrintLVIAfterJumpThreading
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
llvm::JumpThreadingPass::processThreadableEdges
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
Definition: JumpThreading.cpp:1625
llvm::CastInst::CreateBitOrPointerCast
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Definition: Instructions.cpp:3193
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2667
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5272
Casting.h
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1370
Function.h
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:431
JumpThreadingFreezeSelectCond
static cl::opt< bool > JumpThreadingFreezeSelectCond("jump-threading-freeze-select-cond", cl::desc("Freeze the condition when unfolding select"), cl::init(false), cl::Hidden)
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:393
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_END(JumpThreading
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3606
llvm::BranchProbability::getCompl
BranchProbability getCompl() const
Definition: BranchProbability.h:69
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::JumpThreadingPass::updateSSA
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap< Instruction *, Value * > &ValueMapping)
Update the SSA form.
Definition: JumpThreading.cpp:2026
llvm::pred_begin
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
Instructions.h
llvm::JumpThreadingPass::computeValueKnownInPredecessorsImpl
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet< Value * > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
Definition: JumpThreading.cpp:642
llvm::BranchProbability::normalizeProbabilities
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Definition: BranchProbability.h:205
SmallVector.h
llvm::LazyValueInfo::Tristate
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:60
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
User.h
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:970
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5352
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::SSAUpdater::RewriteUse
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:796
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1809
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2741
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2625
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::ConstantRange::makeExactICmpRegion
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
Definition: ConstantRange.cpp:138
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::DeleteDeadBlock
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Definition: BasicBlockUtils.cpp:89
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
ThreadAcrossLoopHeaders
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:261
llvm::MergeBasicBlockIntoOnlyPred
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:741
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3204
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:370
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1418
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:98
llvm::SSAUpdater::AddAvailableValue
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:483
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3060
llvm::ConstantFoldInstruction
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Definition: ConstantFolding.cpp:1197
raw_ostream.h
llvm::SSAUpdater
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
llvm::JumpThreadingPass::cloneInstructions
DenseMap< Instruction *, Value * > cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
Definition: JumpThreading.cpp:2072
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
BasicBlockUtils.h
llvm::JumpThreadingPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: JumpThreading.cpp:343
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
Value.h
InitializePasses.h
llvm::BasicBlock::isEHPad
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:465
llvm::JumpThreadingPass::processBlock
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
Definition: JumpThreading.cpp:1030
llvm::MCID::Terminator
@ Terminator
Definition: MCInstrDesc.h:155
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::JumpThreadingPass::processBranchOnPHI
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
Definition: JumpThreading.cpp:1807
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3139
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2499
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3153
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::createJumpThreadingPass
FunctionPass * createJumpThreadingPass(bool FreezeSelectCond=false, int Threshold=-1)
Definition: JumpThreading.cpp:178
llvm::DefMaxInstsToScan
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37