LLVM 20.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
29#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
43#include "llvm/IR/Dominators.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/IR/LLVMContext.h"
51#include "llvm/IR/MDBuilder.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/Module.h"
54#include "llvm/IR/PassManager.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/Use.h"
59#include "llvm/IR/Value.h"
64#include "llvm/Support/Debug.h"
71#include <cassert>
72#include <cstdint>
73#include <iterator>
74#include <memory>
75#include <utility>
76
77using namespace llvm;
78using namespace jumpthreading;
79
80#define DEBUG_TYPE "jump-threading"
81
82STATISTIC(NumThreads, "Number of jumps threaded");
83STATISTIC(NumFolds, "Number of terminators folded");
84STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
85
87BBDuplicateThreshold("jump-threading-threshold",
88 cl::desc("Max block size to duplicate for jump threading"),
90
93 "jump-threading-implication-search-threshold",
94 cl::desc("The number of predecessors to search for a stronger "
95 "condition to use to thread over a weaker condition"),
97
99 "jump-threading-phi-threshold",
100 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
101 cl::Hidden);
102
104 "jump-threading-across-loop-headers",
105 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
106 cl::init(false), cl::Hidden);
107
109 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
110}
111
112// Update branch probability information according to conditional
113// branch probability. This is usually made possible for cloned branches
114// in inline instances by the context specific profile in the caller.
115// For instance,
116//
117// [Block PredBB]
118// [Branch PredBr]
119// if (t) {
120// Block A;
121// } else {
122// Block B;
123// }
124//
125// [Block BB]
126// cond = PN([true, %A], [..., %B]); // PHI node
127// [Branch CondBr]
128// if (cond) {
129// ... // P(cond == true) = 1%
130// }
131//
132// Here we know that when block A is taken, cond must be true, which means
133// P(cond == true | A) = 1
134//
135// Given that P(cond == true) = P(cond == true | A) * P(A) +
136// P(cond == true | B) * P(B)
137// we get:
138// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
139//
140// which gives us:
141// P(A) is less than P(cond == true), i.e.
142// P(t == true) <= P(cond == true)
143//
144// In other words, if we know P(cond == true) is unlikely, we know
145// that P(t == true) is also unlikely.
146//
148 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
149 if (!CondBr)
150 return;
151
152 uint64_t TrueWeight, FalseWeight;
153 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
154 return;
155
156 if (TrueWeight + FalseWeight == 0)
157 // Zero branch_weights do not give a hint for getting branch probabilities.
158 // Technically it would result in division by zero denominator, which is
159 // TrueWeight + FalseWeight.
160 return;
161
162 // Returns the outgoing edge of the dominating predecessor block
163 // that leads to the PhiNode's incoming block:
164 auto GetPredOutEdge =
165 [](BasicBlock *IncomingBB,
166 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
167 auto *PredBB = IncomingBB;
168 auto *SuccBB = PhiBB;
170 while (true) {
171 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
172 if (PredBr && PredBr->isConditional())
173 return {PredBB, SuccBB};
174 Visited.insert(PredBB);
175 auto *SinglePredBB = PredBB->getSinglePredecessor();
176 if (!SinglePredBB)
177 return {nullptr, nullptr};
178
179 // Stop searching when SinglePredBB has been visited. It means we see
180 // an unreachable loop.
181 if (Visited.count(SinglePredBB))
182 return {nullptr, nullptr};
183
184 SuccBB = PredBB;
185 PredBB = SinglePredBB;
186 }
187 };
188
189 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
190 Value *PhiOpnd = PN->getIncomingValue(i);
191 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
192
193 if (!CI || !CI->getType()->isIntegerTy(1))
194 continue;
195
198 TrueWeight, TrueWeight + FalseWeight)
200 FalseWeight, TrueWeight + FalseWeight));
201
202 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
203 if (!PredOutEdge.first)
204 return;
205
206 BasicBlock *PredBB = PredOutEdge.first;
207 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
208 if (!PredBr)
209 return;
210
211 uint64_t PredTrueWeight, PredFalseWeight;
212 // FIXME: We currently only set the profile data when it is missing.
213 // With PGO, this can be used to refine even existing profile data with
214 // context information. This needs to be done after more performance
215 // testing.
216 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
217 continue;
218
219 // We can not infer anything useful when BP >= 50%, because BP is the
220 // upper bound probability value.
221 if (BP >= BranchProbability(50, 100))
222 continue;
223
224 uint32_t Weights[2];
225 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
226 Weights[0] = BP.getNumerator();
227 Weights[1] = BP.getCompl().getNumerator();
228 } else {
229 Weights[0] = BP.getCompl().getNumerator();
230 Weights[1] = BP.getNumerator();
231 }
232 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
233 }
234}
235
238 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
239 // Jump Threading has no sense for the targets with divergent CF
241 return PreservedAnalyses::all();
242 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
243 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
244 auto &AA = AM.getResult<AAManager>(F);
245 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
246
247 bool Changed =
248 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
249 std::make_unique<DomTreeUpdater>(
250 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
251 std::nullopt, std::nullopt);
252
253 if (!Changed)
254 return PreservedAnalyses::all();
255
256
258
259#if defined(EXPENSIVE_CHECKS)
260 assert(getDomTreeUpdater()->getDomTree().verify(
261 DominatorTree::VerificationLevel::Full) &&
262 "DT broken after JumpThreading");
263 assert((!getDomTreeUpdater()->hasPostDomTree() ||
264 getDomTreeUpdater()->getPostDomTree().verify(
266 "PDT broken after JumpThreading");
267#else
268 assert(getDomTreeUpdater()->getDomTree().verify(
269 DominatorTree::VerificationLevel::Fast) &&
270 "DT broken after JumpThreading");
271 assert((!getDomTreeUpdater()->hasPostDomTree() ||
272 getDomTreeUpdater()->getPostDomTree().verify(
274 "PDT broken after JumpThreading");
275#endif
276
277 return getPreservedAnalysis();
278}
279
281 TargetLibraryInfo *TLI_,
283 AliasAnalysis *AA_,
284 std::unique_ptr<DomTreeUpdater> DTU_,
285 std::optional<BlockFrequencyInfo *> BFI_,
286 std::optional<BranchProbabilityInfo *> BPI_) {
287 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
288 F = &F_;
289 FAM = FAM_;
290 TLI = TLI_;
291 TTI = TTI_;
292 LVI = LVI_;
293 AA = AA_;
294 DTU = std::move(DTU_);
295 BFI = BFI_;
296 BPI = BPI_;
297 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
298 F->getParent(), Intrinsic::experimental_guard);
299 HasGuards = GuardDecl && !GuardDecl->use_empty();
300
301 // Reduce the number of instructions duplicated when optimizing strictly for
302 // size.
303 if (BBDuplicateThreshold.getNumOccurrences())
304 BBDupThreshold = BBDuplicateThreshold;
305 else if (F->hasFnAttribute(Attribute::MinSize))
306 BBDupThreshold = 3;
307 else
308 BBDupThreshold = DefaultBBDupThreshold;
309
310 // JumpThreading must not processes blocks unreachable from entry. It's a
311 // waste of compute time and can potentially lead to hangs.
313 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
314 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
315 DominatorTree &DT = DTU->getDomTree();
316 for (auto &BB : *F)
317 if (!DT.isReachableFromEntry(&BB))
318 Unreachable.insert(&BB);
319
322
323 bool EverChanged = false;
324 bool Changed;
325 do {
326 Changed = false;
327 for (auto &BB : *F) {
328 if (Unreachable.count(&BB))
329 continue;
330 while (processBlock(&BB)) // Thread all of the branches we can over BB.
331 Changed = ChangedSinceLastAnalysisUpdate = true;
332
333 // Jump threading may have introduced redundant debug values into BB
334 // which should be removed.
335 if (Changed)
337
338 // Stop processing BB if it's the entry or is now deleted. The following
339 // routines attempt to eliminate BB and locating a suitable replacement
340 // for the entry is non-trivial.
341 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
342 continue;
343
344 if (pred_empty(&BB)) {
345 // When processBlock makes BB unreachable it doesn't bother to fix up
346 // the instructions in it. We must remove BB to prevent invalid IR.
347 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
348 << "' with terminator: " << *BB.getTerminator()
349 << '\n');
350 LoopHeaders.erase(&BB);
351 LVI->eraseBlock(&BB);
352 DeleteDeadBlock(&BB, DTU.get());
353 Changed = ChangedSinceLastAnalysisUpdate = true;
354 continue;
355 }
356
357 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
358 // is "almost empty", we attempt to merge BB with its sole successor.
359 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
360 if (BI && BI->isUnconditional()) {
361 BasicBlock *Succ = BI->getSuccessor(0);
362 if (
363 // The terminator must be the only non-phi instruction in BB.
364 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
365 // Don't alter Loop headers and latches to ensure another pass can
366 // detect and transform nested loops later.
367 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
370 // BB is valid for cleanup here because we passed in DTU. F remains
371 // BB's parent until a DTU->getDomTree() event.
372 LVI->eraseBlock(&BB);
373 Changed = ChangedSinceLastAnalysisUpdate = true;
374 }
375 }
376 }
377 EverChanged |= Changed;
378 } while (Changed);
379
380 LoopHeaders.clear();
381 return EverChanged;
382}
383
384// Replace uses of Cond with ToVal when safe to do so. If all uses are
385// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
386// because we may incorrectly replace uses when guards/assumes are uses of
387// of `Cond` and we used the guards/assume to reason about the `Cond` value
388// at the end of block. RAUW unconditionally replaces all uses
389// including the guards/assumes themselves and the uses before the
390// guard/assume.
392 BasicBlock *KnownAtEndOfBB) {
393 bool Changed = false;
394 assert(Cond->getType() == ToVal->getType());
395 // We can unconditionally replace all uses in non-local blocks (i.e. uses
396 // strictly dominated by BB), since LVI information is true from the
397 // terminator of BB.
398 if (Cond->getParent() == KnownAtEndOfBB)
399 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
400 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
401 // Replace any debug-info record users of Cond with ToVal.
402 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
403 DVR.replaceVariableLocationOp(Cond, ToVal, true);
404
405 // Reached the Cond whose uses we are trying to replace, so there are no
406 // more uses.
407 if (&I == Cond)
408 break;
409 // We only replace uses in instructions that are guaranteed to reach the end
410 // of BB, where we know Cond is ToVal.
412 break;
413 Changed |= I.replaceUsesOfWith(Cond, ToVal);
414 }
415 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
416 Cond->eraseFromParent();
417 Changed = true;
418 }
419 return Changed;
420}
421
422/// Return the cost of duplicating a piece of this block from first non-phi
423/// and before StopAt instruction to thread across it. Stop scanning the block
424/// when exceeding the threshold. If duplication is impossible, returns ~0U.
426 BasicBlock *BB,
427 Instruction *StopAt,
428 unsigned Threshold) {
429 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
430
431 // Do not duplicate the BB if it has a lot of PHI nodes.
432 // If a threadable chain is too long then the number of PHI nodes can add up,
433 // leading to a substantial increase in compile time when rewriting the SSA.
434 unsigned PhiCount = 0;
435 Instruction *FirstNonPHI = nullptr;
436 for (Instruction &I : *BB) {
437 if (!isa<PHINode>(&I)) {
438 FirstNonPHI = &I;
439 break;
440 }
441 if (++PhiCount > PhiDuplicateThreshold)
442 return ~0U;
443 }
444
445 /// Ignore PHI nodes, these will be flattened when duplication happens.
446 BasicBlock::const_iterator I(FirstNonPHI);
447
448 // FIXME: THREADING will delete values that are just used to compute the
449 // branch, so they shouldn't count against the duplication cost.
450
451 unsigned Bonus = 0;
452 if (BB->getTerminator() == StopAt) {
453 // Threading through a switch statement is particularly profitable. If this
454 // block ends in a switch, decrease its cost to make it more likely to
455 // happen.
456 if (isa<SwitchInst>(StopAt))
457 Bonus = 6;
458
459 // The same holds for indirect branches, but slightly more so.
460 if (isa<IndirectBrInst>(StopAt))
461 Bonus = 8;
462 }
463
464 // Bump the threshold up so the early exit from the loop doesn't skip the
465 // terminator-based Size adjustment at the end.
466 Threshold += Bonus;
467
468 // Sum up the cost of each instruction until we get to the terminator. Don't
469 // include the terminator because the copy won't include it.
470 unsigned Size = 0;
471 for (; &*I != StopAt; ++I) {
472
473 // Stop scanning the block if we've reached the threshold.
474 if (Size > Threshold)
475 return Size;
476
477 // Bail out if this instruction gives back a token type, it is not possible
478 // to duplicate it if it is used outside this BB.
479 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
480 return ~0U;
481
482 // Blocks with NoDuplicate are modelled as having infinite cost, so they
483 // are never duplicated.
484 if (const CallInst *CI = dyn_cast<CallInst>(I))
485 if (CI->cannotDuplicate() || CI->isConvergent())
486 return ~0U;
487
490 continue;
491
492 // All other instructions count for at least one unit.
493 ++Size;
494
495 // Calls are more expensive. If they are non-intrinsic calls, we model them
496 // as having cost of 4. If they are a non-vector intrinsic, we model them
497 // as having cost of 2 total, and if they are a vector intrinsic, we model
498 // them as having cost 1.
499 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
500 if (!isa<IntrinsicInst>(CI))
501 Size += 3;
502 else if (!CI->getType()->isVectorTy())
503 Size += 1;
504 }
505 }
506
507 return Size > Bonus ? Size - Bonus : 0;
508}
509
510/// findLoopHeaders - We do not want jump threading to turn proper loop
511/// structures into irreducible loops. Doing this breaks up the loop nesting
512/// hierarchy and pessimizes later transformations. To prevent this from
513/// happening, we first have to find the loop headers. Here we approximate this
514/// by finding targets of backedges in the CFG.
515///
516/// Note that there definitely are cases when we want to allow threading of
517/// edges across a loop header. For example, threading a jump from outside the
518/// loop (the preheader) to an exit block of the loop is definitely profitable.
519/// It is also almost always profitable to thread backedges from within the loop
520/// to exit blocks, and is often profitable to thread backedges to other blocks
521/// within the loop (forming a nested loop). This simple analysis is not rich
522/// enough to track all of these properties and keep it up-to-date as the CFG
523/// mutates, so we don't allow any of these transformations.
526 FindFunctionBackedges(F, Edges);
527
528 for (const auto &Edge : Edges)
529 LoopHeaders.insert(Edge.second);
530}
531
532/// getKnownConstant - Helper method to determine if we can thread over a
533/// terminator with the given value as its condition, and if so what value to
534/// use for that. What kind of value this is depends on whether we want an
535/// integer or a block address, but an undef is always accepted.
536/// Returns null if Val is null or not an appropriate constant.
538 if (!Val)
539 return nullptr;
540
541 // Undef is "known" enough.
542 if (UndefValue *U = dyn_cast<UndefValue>(Val))
543 return U;
544
545 if (Preference == WantBlockAddress)
546 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
547
548 return dyn_cast<ConstantInt>(Val);
549}
550
551/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
552/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
553/// in any of our predecessors. If so, return the known list of value and pred
554/// BB in the result vector.
555///
556/// This returns true if there were any known values.
558 Value *V, BasicBlock *BB, PredValueInfo &Result,
559 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
560 Instruction *CxtI) {
561 const DataLayout &DL = BB->getDataLayout();
562
563 // This method walks up use-def chains recursively. Because of this, we could
564 // get into an infinite loop going around loops in the use-def chain. To
565 // prevent this, keep track of what (value, block) pairs we've already visited
566 // and terminate the search if we loop back to them
567 if (!RecursionSet.insert(V).second)
568 return false;
569
570 // If V is a constant, then it is known in all predecessors.
571 if (Constant *KC = getKnownConstant(V, Preference)) {
572 for (BasicBlock *Pred : predecessors(BB))
573 Result.emplace_back(KC, Pred);
574
575 return !Result.empty();
576 }
577
578 // If V is a non-instruction value, or an instruction in a different block,
579 // then it can't be derived from a PHI.
580 Instruction *I = dyn_cast<Instruction>(V);
581 if (!I || I->getParent() != BB) {
582
583 // Okay, if this is a live-in value, see if it has a known value at the any
584 // edge from our predecessors.
585 for (BasicBlock *P : predecessors(BB)) {
586 using namespace PatternMatch;
587 // If the value is known by LazyValueInfo to be a constant in a
588 // predecessor, use that information to try to thread this block.
589 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
590 // If I is a non-local compare-with-constant instruction, use more-rich
591 // 'getPredicateOnEdge' method. This would be able to handle value
592 // inequalities better, for example if the compare is "X < 4" and "X < 3"
593 // is known true but "X < 4" itself is not available.
594 CmpPredicate Pred;
595 Value *Val;
596 Constant *Cst;
597 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
598 PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
599 if (Constant *KC = getKnownConstant(PredCst, Preference))
600 Result.emplace_back(KC, P);
601 }
602
603 return !Result.empty();
604 }
605
606 /// If I is a PHI node, then we know the incoming values for any constants.
607 if (PHINode *PN = dyn_cast<PHINode>(I)) {
608 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
609 Value *InVal = PN->getIncomingValue(i);
610 if (Constant *KC = getKnownConstant(InVal, Preference)) {
611 Result.emplace_back(KC, PN->getIncomingBlock(i));
612 } else {
613 Constant *CI = LVI->getConstantOnEdge(InVal,
614 PN->getIncomingBlock(i),
615 BB, CxtI);
616 if (Constant *KC = getKnownConstant(CI, Preference))
617 Result.emplace_back(KC, PN->getIncomingBlock(i));
618 }
619 }
620
621 return !Result.empty();
622 }
623
624 // Handle Cast instructions.
625 if (CastInst *CI = dyn_cast<CastInst>(I)) {
626 Value *Source = CI->getOperand(0);
627 PredValueInfoTy Vals;
628 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
629 RecursionSet, CxtI);
630 if (Vals.empty())
631 return false;
632
633 // Convert the known values.
634 for (auto &Val : Vals)
635 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
636 CI->getType(), DL))
637 Result.emplace_back(Folded, Val.second);
638
639 return !Result.empty();
640 }
641
642 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
643 Value *Source = FI->getOperand(0);
644 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
645 RecursionSet, CxtI);
646
647 erase_if(Result, [](auto &Pair) {
648 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
649 });
650
651 return !Result.empty();
652 }
653
654 // Handle some boolean conditions.
655 if (I->getType()->getPrimitiveSizeInBits() == 1) {
656 using namespace PatternMatch;
657 if (Preference != WantInteger)
658 return false;
659 // X | true -> true
660 // X & false -> false
661 Value *Op0, *Op1;
662 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
663 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
664 PredValueInfoTy LHSVals, RHSVals;
665
667 RecursionSet, CxtI);
669 RecursionSet, CxtI);
670
671 if (LHSVals.empty() && RHSVals.empty())
672 return false;
673
674 ConstantInt *InterestingVal;
675 if (match(I, m_LogicalOr()))
676 InterestingVal = ConstantInt::getTrue(I->getContext());
677 else
678 InterestingVal = ConstantInt::getFalse(I->getContext());
679
680 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
681
682 // Scan for the sentinel. If we find an undef, force it to the
683 // interesting value: x|undef -> true and x&undef -> false.
684 for (const auto &LHSVal : LHSVals)
685 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
686 Result.emplace_back(InterestingVal, LHSVal.second);
687 LHSKnownBBs.insert(LHSVal.second);
688 }
689 for (const auto &RHSVal : RHSVals)
690 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
691 // If we already inferred a value for this block on the LHS, don't
692 // re-add it.
693 if (!LHSKnownBBs.count(RHSVal.second))
694 Result.emplace_back(InterestingVal, RHSVal.second);
695 }
696
697 return !Result.empty();
698 }
699
700 // Handle the NOT form of XOR.
701 if (I->getOpcode() == Instruction::Xor &&
702 isa<ConstantInt>(I->getOperand(1)) &&
703 cast<ConstantInt>(I->getOperand(1))->isOne()) {
704 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
705 WantInteger, RecursionSet, CxtI);
706 if (Result.empty())
707 return false;
708
709 // Invert the known values.
710 for (auto &R : Result)
711 R.first = ConstantExpr::getNot(R.first);
712
713 return true;
714 }
715
716 // Try to simplify some other binary operator values.
717 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
718 if (Preference != WantInteger)
719 return false;
720 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
721 PredValueInfoTy LHSVals;
722 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
723 WantInteger, RecursionSet, CxtI);
724
725 // Try to use constant folding to simplify the binary operator.
726 for (const auto &LHSVal : LHSVals) {
727 Constant *V = LHSVal.first;
728 Constant *Folded =
729 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
730
731 if (Constant *KC = getKnownConstant(Folded, WantInteger))
732 Result.emplace_back(KC, LHSVal.second);
733 }
734 }
735
736 return !Result.empty();
737 }
738
739 // Handle compare with phi operand, where the PHI is defined in this block.
740 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
741 if (Preference != WantInteger)
742 return false;
743 Type *CmpType = Cmp->getType();
744 Value *CmpLHS = Cmp->getOperand(0);
745 Value *CmpRHS = Cmp->getOperand(1);
746 CmpInst::Predicate Pred = Cmp->getPredicate();
747
748 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
749 if (!PN)
750 PN = dyn_cast<PHINode>(CmpRHS);
751 // Do not perform phi translation across a loop header phi, because this
752 // may result in comparison of values from two different loop iterations.
753 // FIXME: This check is broken if LoopHeaders is not populated.
754 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
755 const DataLayout &DL = PN->getDataLayout();
756 // We can do this simplification if any comparisons fold to true or false.
757 // See if any do.
758 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
759 BasicBlock *PredBB = PN->getIncomingBlock(i);
760 Value *LHS, *RHS;
761 if (PN == CmpLHS) {
762 LHS = PN->getIncomingValue(i);
763 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
764 } else {
765 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
766 RHS = PN->getIncomingValue(i);
767 }
768 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
769 if (!Res) {
770 if (!isa<Constant>(RHS))
771 continue;
772
773 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
774 auto LHSInst = dyn_cast<Instruction>(LHS);
775 if (LHSInst && LHSInst->getParent() == BB)
776 continue;
777
778 Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
779 BB, CxtI ? CxtI : Cmp);
780 }
781
782 if (Constant *KC = getKnownConstant(Res, WantInteger))
783 Result.emplace_back(KC, PredBB);
784 }
785
786 return !Result.empty();
787 }
788
789 // If comparing a live-in value against a constant, see if we know the
790 // live-in value on any predecessors.
791 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
792 Constant *CmpConst = cast<Constant>(CmpRHS);
793
794 if (!isa<Instruction>(CmpLHS) ||
795 cast<Instruction>(CmpLHS)->getParent() != BB) {
796 for (BasicBlock *P : predecessors(BB)) {
797 // If the value is known by LazyValueInfo to be a constant in a
798 // predecessor, use that information to try to thread this block.
799 Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
800 CxtI ? CxtI : Cmp);
801 if (Constant *KC = getKnownConstant(Res, WantInteger))
802 Result.emplace_back(KC, P);
803 }
804
805 return !Result.empty();
806 }
807
808 // InstCombine can fold some forms of constant range checks into
809 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
810 // x as a live-in.
811 {
812 using namespace PatternMatch;
813
814 Value *AddLHS;
815 ConstantInt *AddConst;
816 if (isa<ConstantInt>(CmpConst) &&
817 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
818 if (!isa<Instruction>(AddLHS) ||
819 cast<Instruction>(AddLHS)->getParent() != BB) {
820 for (BasicBlock *P : predecessors(BB)) {
821 // If the value is known by LazyValueInfo to be a ConstantRange in
822 // a predecessor, use that information to try to thread this
823 // block.
825 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
826 // Propagate the range through the addition.
827 CR = CR.add(AddConst->getValue());
828
829 // Get the range where the compare returns true.
831 Pred, cast<ConstantInt>(CmpConst)->getValue());
832
833 Constant *ResC;
834 if (CmpRange.contains(CR))
835 ResC = ConstantInt::getTrue(CmpType);
836 else if (CmpRange.inverse().contains(CR))
837 ResC = ConstantInt::getFalse(CmpType);
838 else
839 continue;
840
841 Result.emplace_back(ResC, P);
842 }
843
844 return !Result.empty();
845 }
846 }
847 }
848
849 // Try to find a constant value for the LHS of a comparison,
850 // and evaluate it statically if we can.
851 PredValueInfoTy LHSVals;
852 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
853 WantInteger, RecursionSet, CxtI);
854
855 for (const auto &LHSVal : LHSVals) {
856 Constant *V = LHSVal.first;
857 Constant *Folded =
858 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
859 if (Constant *KC = getKnownConstant(Folded, WantInteger))
860 Result.emplace_back(KC, LHSVal.second);
861 }
862
863 return !Result.empty();
864 }
865 }
866
867 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
868 // Handle select instructions where at least one operand is a known constant
869 // and we can figure out the condition value for any predecessor block.
870 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
871 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
872 PredValueInfoTy Conds;
873 if ((TrueVal || FalseVal) &&
874 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
875 WantInteger, RecursionSet, CxtI)) {
876 for (auto &C : Conds) {
877 Constant *Cond = C.first;
878
879 // Figure out what value to use for the condition.
880 bool KnownCond;
881 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
882 // A known boolean.
883 KnownCond = CI->isOne();
884 } else {
885 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
886 // Either operand will do, so be sure to pick the one that's a known
887 // constant.
888 // FIXME: Do this more cleverly if both values are known constants?
889 KnownCond = (TrueVal != nullptr);
890 }
891
892 // See if the select has a known constant value for this predecessor.
893 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
894 Result.emplace_back(Val, C.second);
895 }
896
897 return !Result.empty();
898 }
899 }
900
901 // If all else fails, see if LVI can figure out a constant value for us.
902 assert(CxtI->getParent() == BB && "CxtI should be in BB");
903 Constant *CI = LVI->getConstant(V, CxtI);
904 if (Constant *KC = getKnownConstant(CI, Preference)) {
905 for (BasicBlock *Pred : predecessors(BB))
906 Result.emplace_back(KC, Pred);
907 }
908
909 return !Result.empty();
910}
911
912/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
913/// in an undefined jump, decide which block is best to revector to.
914///
915/// Since we can pick an arbitrary destination, we pick the successor with the
916/// fewest predecessors. This should reduce the in-degree of the others.
918 Instruction *BBTerm = BB->getTerminator();
919 unsigned MinSucc = 0;
920 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
921 // Compute the successor with the minimum number of predecessors.
922 unsigned MinNumPreds = pred_size(TestBB);
923 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
924 TestBB = BBTerm->getSuccessor(i);
925 unsigned NumPreds = pred_size(TestBB);
926 if (NumPreds < MinNumPreds) {
927 MinSucc = i;
928 MinNumPreds = NumPreds;
929 }
930 }
931
932 return MinSucc;
933}
934
936 if (!BB->hasAddressTaken()) return false;
937
938 // If the block has its address taken, it may be a tree of dead constants
939 // hanging off of it. These shouldn't keep the block alive.
942 return !BA->use_empty();
943}
944
945/// processBlock - If there are any predecessors whose control can be threaded
946/// through to a successor, transform them now.
948 // If the block is trivially dead, just return and let the caller nuke it.
949 // This simplifies other transformations.
950 if (DTU->isBBPendingDeletion(BB) ||
951 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
952 return false;
953
954 // If this block has a single predecessor, and if that pred has a single
955 // successor, merge the blocks. This encourages recursive jump threading
956 // because now the condition in this block can be threaded through
957 // predecessors of our predecessor block.
959 return true;
960
962 return true;
963
964 // Look if we can propagate guards to predecessors.
965 if (HasGuards && processGuards(BB))
966 return true;
967
968 // What kind of constant we're looking for.
969 ConstantPreference Preference = WantInteger;
970
971 // Look to see if the terminator is a conditional branch, switch or indirect
972 // branch, if not we can't thread it.
973 Value *Condition;
974 Instruction *Terminator = BB->getTerminator();
975 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
976 // Can't thread an unconditional jump.
977 if (BI->isUnconditional()) return false;
978 Condition = BI->getCondition();
979 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
980 Condition = SI->getCondition();
981 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
982 // Can't thread indirect branch with no successors.
983 if (IB->getNumSuccessors() == 0) return false;
984 Condition = IB->getAddress()->stripPointerCasts();
985 Preference = WantBlockAddress;
986 } else {
987 return false; // Must be an invoke or callbr.
988 }
989
990 // Keep track if we constant folded the condition in this invocation.
991 bool ConstantFolded = false;
992
993 // Run constant folding to see if we can reduce the condition to a simple
994 // constant.
995 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
996 Value *SimpleVal =
998 if (SimpleVal) {
999 I->replaceAllUsesWith(SimpleVal);
1000 if (isInstructionTriviallyDead(I, TLI))
1001 I->eraseFromParent();
1002 Condition = SimpleVal;
1003 ConstantFolded = true;
1004 }
1005 }
1006
1007 // If the terminator is branching on an undef or freeze undef, we can pick any
1008 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1009 auto *FI = dyn_cast<FreezeInst>(Condition);
1010 if (isa<UndefValue>(Condition) ||
1011 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1012 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1013 std::vector<DominatorTree::UpdateType> Updates;
1014
1015 // Fold the branch/switch.
1016 Instruction *BBTerm = BB->getTerminator();
1017 Updates.reserve(BBTerm->getNumSuccessors());
1018 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1019 if (i == BestSucc) continue;
1020 BasicBlock *Succ = BBTerm->getSuccessor(i);
1021 Succ->removePredecessor(BB, true);
1022 Updates.push_back({DominatorTree::Delete, BB, Succ});
1023 }
1024
1025 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1026 << "' folding undef terminator: " << *BBTerm << '\n');
1027 Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());
1028 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1029 ++NumFolds;
1030 BBTerm->eraseFromParent();
1031 DTU->applyUpdatesPermissive(Updates);
1032 if (FI)
1033 FI->eraseFromParent();
1034 return true;
1035 }
1036
1037 // If the terminator of this block is branching on a constant, simplify the
1038 // terminator to an unconditional branch. This can occur due to threading in
1039 // other blocks.
1040 if (getKnownConstant(Condition, Preference)) {
1041 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1042 << "' folding terminator: " << *BB->getTerminator()
1043 << '\n');
1044 ++NumFolds;
1045 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1046 if (auto *BPI = getBPI())
1047 BPI->eraseBlock(BB);
1048 return true;
1049 }
1050
1051 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1052
1053 // All the rest of our checks depend on the condition being an instruction.
1054 if (!CondInst) {
1055 // FIXME: Unify this with code below.
1056 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1057 return true;
1058 return ConstantFolded;
1059 }
1060
1061 // Some of the following optimization can safely work on the unfrozen cond.
1062 Value *CondWithoutFreeze = CondInst;
1063 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1064 CondWithoutFreeze = FI->getOperand(0);
1065
1066 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1067 // If we're branching on a conditional, LVI might be able to determine
1068 // it's value at the branch instruction. We only handle comparisons
1069 // against a constant at this time.
1070 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1071 Constant *Res =
1072 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1073 CondConst, BB->getTerminator(),
1074 /*UseBlockValue=*/false);
1075 if (Res) {
1076 // We can safely replace *some* uses of the CondInst if it has
1077 // exactly one value as returned by LVI. RAUW is incorrect in the
1078 // presence of guards and assumes, that have the `Cond` as the use. This
1079 // is because we use the guards/assume to reason about the `Cond` value
1080 // at the end of block, but RAUW unconditionally replaces all uses
1081 // including the guards/assumes themselves and the uses before the
1082 // guard/assume.
1083 if (replaceFoldableUses(CondCmp, Res, BB))
1084 return true;
1085 }
1086
1087 // We did not manage to simplify this branch, try to see whether
1088 // CondCmp depends on a known phi-select pattern.
1089 if (tryToUnfoldSelect(CondCmp, BB))
1090 return true;
1091 }
1092 }
1093
1094 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1095 if (tryToUnfoldSelect(SI, BB))
1096 return true;
1097
1098 // Check for some cases that are worth simplifying. Right now we want to look
1099 // for loads that are used by a switch or by the condition for the branch. If
1100 // we see one, check to see if it's partially redundant. If so, insert a PHI
1101 // which can then be used to thread the values.
1102 Value *SimplifyValue = CondWithoutFreeze;
1103
1104 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1105 if (isa<Constant>(CondCmp->getOperand(1)))
1106 SimplifyValue = CondCmp->getOperand(0);
1107
1108 // TODO: There are other places where load PRE would be profitable, such as
1109 // more complex comparisons.
1110 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1112 return true;
1113
1114 // Before threading, try to propagate profile data backwards:
1115 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1116 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1118
1119 // Handle a variety of cases where we are branching on something derived from
1120 // a PHI node in the current block. If we can prove that any predecessors
1121 // compute a predictable value based on a PHI node, thread those predecessors.
1122 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1123 return true;
1124
1125 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1126 // the current block, see if we can simplify.
1127 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1128 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1129 return processBranchOnPHI(PN);
1130
1131 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1132 if (CondInst->getOpcode() == Instruction::Xor &&
1133 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1134 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1135
1136 // Search for a stronger dominating condition that can be used to simplify a
1137 // conditional branch leaving BB.
1139 return true;
1140
1141 return false;
1142}
1143
1145 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1146 if (!BI || !BI->isConditional())
1147 return false;
1148
1149 Value *Cond = BI->getCondition();
1150 // Assuming that predecessor's branch was taken, if pred's branch condition
1151 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1152 // freeze(Cond) is either true or a nondeterministic value.
1153 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1154 // without affecting other instructions.
1155 auto *FICond = dyn_cast<FreezeInst>(Cond);
1156 if (FICond && FICond->hasOneUse())
1157 Cond = FICond->getOperand(0);
1158 else
1159 FICond = nullptr;
1160
1161 BasicBlock *CurrentBB = BB;
1162 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1163 unsigned Iter = 0;
1164
1165 auto &DL = BB->getDataLayout();
1166
1167 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1168 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1169 if (!PBI || !PBI->isConditional())
1170 return false;
1171 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1172 return false;
1173
1174 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1175 std::optional<bool> Implication =
1176 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1177
1178 // If the branch condition of BB (which is Cond) and CurrentPred are
1179 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1180 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1181 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1182 FICond->getOperand(0))
1183 Implication = CondIsTrue;
1184 }
1185
1186 if (Implication) {
1187 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1188 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1189 RemoveSucc->removePredecessor(BB);
1190 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());
1191 UncondBI->setDebugLoc(BI->getDebugLoc());
1192 ++NumFolds;
1193 BI->eraseFromParent();
1194 if (FICond)
1195 FICond->eraseFromParent();
1196
1197 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1198 if (auto *BPI = getBPI())
1199 BPI->eraseBlock(BB);
1200 return true;
1201 }
1202 CurrentBB = CurrentPred;
1203 CurrentPred = CurrentBB->getSinglePredecessor();
1204 }
1205
1206 return false;
1207}
1208
1209/// Return true if Op is an instruction defined in the given block.
1211 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1212 if (OpInst->getParent() == BB)
1213 return true;
1214 return false;
1215}
1216
1217/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1218/// redundant load instruction, eliminate it by replacing it with a PHI node.
1219/// This is an important optimization that encourages jump threading, and needs
1220/// to be run interlaced with other jump threading tasks.
1222 // Don't hack volatile and ordered loads.
1223 if (!LoadI->isUnordered()) return false;
1224
1225 // If the load is defined in a block with exactly one predecessor, it can't be
1226 // partially redundant.
1227 BasicBlock *LoadBB = LoadI->getParent();
1228 if (LoadBB->getSinglePredecessor())
1229 return false;
1230
1231 // If the load is defined in an EH pad, it can't be partially redundant,
1232 // because the edges between the invoke and the EH pad cannot have other
1233 // instructions between them.
1234 if (LoadBB->isEHPad())
1235 return false;
1236
1237 Value *LoadedPtr = LoadI->getOperand(0);
1238
1239 // If the loaded operand is defined in the LoadBB and its not a phi,
1240 // it can't be available in predecessors.
1241 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1242 return false;
1243
1244 // Scan a few instructions up from the load, to see if it is obviously live at
1245 // the entry to its block.
1246 BasicBlock::iterator BBIt(LoadI);
1247 bool IsLoadCSE;
1248 BatchAAResults BatchAA(*AA);
1249 // The dominator tree is updated lazily and may not be valid at this point.
1250 BatchAA.disableDominatorTree();
1251 if (Value *AvailableVal = FindAvailableLoadedValue(
1252 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1253 // If the value of the load is locally available within the block, just use
1254 // it. This frequently occurs for reg2mem'd allocas.
1255
1256 if (IsLoadCSE) {
1257 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1258 combineMetadataForCSE(NLoadI, LoadI, false);
1259 LVI->forgetValue(NLoadI);
1260 };
1261
1262 // If the returned value is the load itself, replace with poison. This can
1263 // only happen in dead loops.
1264 if (AvailableVal == LoadI)
1265 AvailableVal = PoisonValue::get(LoadI->getType());
1266 if (AvailableVal->getType() != LoadI->getType()) {
1267 AvailableVal = CastInst::CreateBitOrPointerCast(
1268 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1269 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1270 }
1271 LoadI->replaceAllUsesWith(AvailableVal);
1272 LoadI->eraseFromParent();
1273 return true;
1274 }
1275
1276 // Otherwise, if we scanned the whole block and got to the top of the block,
1277 // we know the block is locally transparent to the load. If not, something
1278 // might clobber its value.
1279 if (BBIt != LoadBB->begin())
1280 return false;
1281
1282 // If all of the loads and stores that feed the value have the same AA tags,
1283 // then we can propagate them onto any newly inserted loads.
1284 AAMDNodes AATags = LoadI->getAAMetadata();
1285
1286 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1287
1288 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1289
1290 AvailablePredsTy AvailablePreds;
1291 BasicBlock *OneUnavailablePred = nullptr;
1293
1294 // If we got here, the loaded value is transparent through to the start of the
1295 // block. Check to see if it is available in any of the predecessor blocks.
1296 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1297 // If we already scanned this predecessor, skip it.
1298 if (!PredsScanned.insert(PredBB).second)
1299 continue;
1300
1301 BBIt = PredBB->end();
1302 unsigned NumScanedInst = 0;
1303 Value *PredAvailable = nullptr;
1304 // NOTE: We don't CSE load that is volatile or anything stronger than
1305 // unordered, that should have been checked when we entered the function.
1306 assert(LoadI->isUnordered() &&
1307 "Attempting to CSE volatile or atomic loads");
1308 // If this is a load on a phi pointer, phi-translate it and search
1309 // for available load/store to the pointer in predecessors.
1310 Type *AccessTy = LoadI->getType();
1311 const auto &DL = LoadI->getDataLayout();
1312 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1313 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1314 AATags);
1315 PredAvailable = findAvailablePtrLoadStore(
1316 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1317 &BatchAA, &IsLoadCSE, &NumScanedInst);
1318
1319 // If PredBB has a single predecessor, continue scanning through the
1320 // single predecessor.
1321 BasicBlock *SinglePredBB = PredBB;
1322 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1323 NumScanedInst < DefMaxInstsToScan) {
1324 SinglePredBB = SinglePredBB->getSinglePredecessor();
1325 if (SinglePredBB) {
1326 BBIt = SinglePredBB->end();
1327 PredAvailable = findAvailablePtrLoadStore(
1328 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1329 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1330 &NumScanedInst);
1331 }
1332 }
1333
1334 if (!PredAvailable) {
1335 OneUnavailablePred = PredBB;
1336 continue;
1337 }
1338
1339 if (IsLoadCSE)
1340 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1341
1342 // If so, this load is partially redundant. Remember this info so that we
1343 // can create a PHI node.
1344 AvailablePreds.emplace_back(PredBB, PredAvailable);
1345 }
1346
1347 // If the loaded value isn't available in any predecessor, it isn't partially
1348 // redundant.
1349 if (AvailablePreds.empty()) return false;
1350
1351 // Okay, the loaded value is available in at least one (and maybe all!)
1352 // predecessors. If the value is unavailable in more than one unique
1353 // predecessor, we want to insert a merge block for those common predecessors.
1354 // This ensures that we only have to insert one reload, thus not increasing
1355 // code size.
1356 BasicBlock *UnavailablePred = nullptr;
1357
1358 // If the value is unavailable in one of predecessors, we will end up
1359 // inserting a new instruction into them. It is only valid if all the
1360 // instructions before LoadI are guaranteed to pass execution to its
1361 // successor, or if LoadI is safe to speculate.
1362 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1363 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1364 // It requires domination tree analysis, so for this simple case it is an
1365 // overkill.
1366 if (PredsScanned.size() != AvailablePreds.size() &&
1368 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1370 return false;
1371
1372 // If there is exactly one predecessor where the value is unavailable, the
1373 // already computed 'OneUnavailablePred' block is it. If it ends in an
1374 // unconditional branch, we know that it isn't a critical edge.
1375 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1376 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1377 UnavailablePred = OneUnavailablePred;
1378 } else if (PredsScanned.size() != AvailablePreds.size()) {
1379 // Otherwise, we had multiple unavailable predecessors or we had a critical
1380 // edge from the one.
1381 SmallVector<BasicBlock*, 8> PredsToSplit;
1382 SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1383
1384 for (const auto &AvailablePred : AvailablePreds)
1385 AvailablePredSet.insert(AvailablePred.first);
1386
1387 // Add all the unavailable predecessors to the PredsToSplit list.
1388 for (BasicBlock *P : predecessors(LoadBB)) {
1389 // If the predecessor is an indirect goto, we can't split the edge.
1390 if (isa<IndirectBrInst>(P->getTerminator()))
1391 return false;
1392
1393 if (!AvailablePredSet.count(P))
1394 PredsToSplit.push_back(P);
1395 }
1396
1397 // Split them out to their own block.
1398 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1399 }
1400
1401 // If the value isn't available in all predecessors, then there will be
1402 // exactly one where it isn't available. Insert a load on that edge and add
1403 // it to the AvailablePreds list.
1404 if (UnavailablePred) {
1405 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1406 "Can't handle critical edge here!");
1407 LoadInst *NewVal = new LoadInst(
1408 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1409 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1410 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1411 UnavailablePred->getTerminator()->getIterator());
1412 NewVal->setDebugLoc(LoadI->getDebugLoc());
1413 if (AATags)
1414 NewVal->setAAMetadata(AATags);
1415
1416 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1417 }
1418
1419 // Now we know that each predecessor of this block has a value in
1420 // AvailablePreds, sort them for efficient access as we're walking the preds.
1421 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1422
1423 // Create a PHI node at the start of the block for the PRE'd load value.
1424 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1425 PN->insertBefore(LoadBB->begin());
1426 PN->takeName(LoadI);
1427 PN->setDebugLoc(LoadI->getDebugLoc());
1428
1429 // Insert new entries into the PHI for each predecessor. A single block may
1430 // have multiple entries here.
1431 for (BasicBlock *P : predecessors(LoadBB)) {
1432 AvailablePredsTy::iterator I =
1433 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1434
1435 assert(I != AvailablePreds.end() && I->first == P &&
1436 "Didn't find entry for predecessor!");
1437
1438 // If we have an available predecessor but it requires casting, insert the
1439 // cast in the predecessor and use the cast. Note that we have to update the
1440 // AvailablePreds vector as we go so that all of the PHI entries for this
1441 // predecessor use the same bitcast.
1442 Value *&PredV = I->second;
1443 if (PredV->getType() != LoadI->getType())
1445 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1446
1447 PN->addIncoming(PredV, I->first);
1448 }
1449
1450 for (LoadInst *PredLoadI : CSELoads) {
1451 combineMetadataForCSE(PredLoadI, LoadI, true);
1452 LVI->forgetValue(PredLoadI);
1453 }
1454
1455 LoadI->replaceAllUsesWith(PN);
1456 LoadI->eraseFromParent();
1457
1458 return true;
1459}
1460
1461/// findMostPopularDest - The specified list contains multiple possible
1462/// threadable destinations. Pick the one that occurs the most frequently in
1463/// the list.
1464static BasicBlock *
1466 const SmallVectorImpl<std::pair<BasicBlock *,
1467 BasicBlock *>> &PredToDestList) {
1468 assert(!PredToDestList.empty());
1469
1470 // Determine popularity. If there are multiple possible destinations, we
1471 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1472 // blocks with known and real destinations to threading undef. We'll handle
1473 // them later if interesting.
1474 MapVector<BasicBlock *, unsigned> DestPopularity;
1475
1476 // Populate DestPopularity with the successors in the order they appear in the
1477 // successor list. This way, we ensure determinism by iterating it in the
1478 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1479 // return nullptr when PredToDestList contains nullptr only.
1480 DestPopularity[nullptr] = 0;
1481 for (auto *SuccBB : successors(BB))
1482 DestPopularity[SuccBB] = 0;
1483
1484 for (const auto &PredToDest : PredToDestList)
1485 if (PredToDest.second)
1486 DestPopularity[PredToDest.second]++;
1487
1488 // Find the most popular dest.
1489 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1490
1491 // Okay, we have finally picked the most popular destination.
1492 return MostPopular->first;
1493}
1494
1495// Try to evaluate the value of V when the control flows from PredPredBB to
1496// BB->getSinglePredecessor() and then on to BB.
1498 BasicBlock *PredPredBB,
1499 Value *V,
1500 const DataLayout &DL) {
1501 BasicBlock *PredBB = BB->getSinglePredecessor();
1502 assert(PredBB && "Expected a single predecessor");
1503
1504 if (Constant *Cst = dyn_cast<Constant>(V)) {
1505 return Cst;
1506 }
1507
1508 // Consult LVI if V is not an instruction in BB or PredBB.
1509 Instruction *I = dyn_cast<Instruction>(V);
1510 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1511 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1512 }
1513
1514 // Look into a PHI argument.
1515 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1516 if (PHI->getParent() == PredBB)
1517 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1518 return nullptr;
1519 }
1520
1521 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1522 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1523 if (CondCmp->getParent() == BB) {
1524 Constant *Op0 =
1525 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
1526 Constant *Op1 =
1527 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
1528 if (Op0 && Op1) {
1529 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1530 Op1, DL);
1531 }
1532 }
1533 return nullptr;
1534 }
1535
1536 return nullptr;
1537}
1538
1540 ConstantPreference Preference,
1541 Instruction *CxtI) {
1542 // If threading this would thread across a loop header, don't even try to
1543 // thread the edge.
1544 if (LoopHeaders.count(BB))
1545 return false;
1546
1547 PredValueInfoTy PredValues;
1548 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1549 CxtI)) {
1550 // We don't have known values in predecessors. See if we can thread through
1551 // BB and its sole predecessor.
1553 }
1554
1555 assert(!PredValues.empty() &&
1556 "computeValueKnownInPredecessors returned true with no values");
1557
1558 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1559 for (const auto &PredValue : PredValues) {
1560 dbgs() << " BB '" << BB->getName()
1561 << "': FOUND condition = " << *PredValue.first
1562 << " for pred '" << PredValue.second->getName() << "'.\n";
1563 });
1564
1565 // Decide what we want to thread through. Convert our list of known values to
1566 // a list of known destinations for each pred. This also discards duplicate
1567 // predecessors and keeps track of the undefined inputs (which are represented
1568 // as a null dest in the PredToDestList).
1571
1572 BasicBlock *OnlyDest = nullptr;
1573 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1574 Constant *OnlyVal = nullptr;
1575 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1576
1577 for (const auto &PredValue : PredValues) {
1578 BasicBlock *Pred = PredValue.second;
1579 if (!SeenPreds.insert(Pred).second)
1580 continue; // Duplicate predecessor entry.
1581
1582 Constant *Val = PredValue.first;
1583
1584 BasicBlock *DestBB;
1585 if (isa<UndefValue>(Val))
1586 DestBB = nullptr;
1587 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1588 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1589 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1590 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1591 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1592 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1593 } else {
1594 assert(isa<IndirectBrInst>(BB->getTerminator())
1595 && "Unexpected terminator");
1596 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1597 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1598 }
1599
1600 // If we have exactly one destination, remember it for efficiency below.
1601 if (PredToDestList.empty()) {
1602 OnlyDest = DestBB;
1603 OnlyVal = Val;
1604 } else {
1605 if (OnlyDest != DestBB)
1606 OnlyDest = MultipleDestSentinel;
1607 // It possible we have same destination, but different value, e.g. default
1608 // case in switchinst.
1609 if (Val != OnlyVal)
1610 OnlyVal = MultipleVal;
1611 }
1612
1613 // If the predecessor ends with an indirect goto, we can't change its
1614 // destination.
1615 if (isa<IndirectBrInst>(Pred->getTerminator()))
1616 continue;
1617
1618 PredToDestList.emplace_back(Pred, DestBB);
1619 }
1620
1621 // If all edges were unthreadable, we fail.
1622 if (PredToDestList.empty())
1623 return false;
1624
1625 // If all the predecessors go to a single known successor, we want to fold,
1626 // not thread. By doing so, we do not need to duplicate the current block and
1627 // also miss potential opportunities in case we dont/cant duplicate.
1628 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1629 if (BB->hasNPredecessors(PredToDestList.size())) {
1630 bool SeenFirstBranchToOnlyDest = false;
1631 std::vector <DominatorTree::UpdateType> Updates;
1632 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1633 for (BasicBlock *SuccBB : successors(BB)) {
1634 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1635 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1636 } else {
1637 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1638 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1639 }
1640 }
1641
1642 // Finally update the terminator.
1643 Instruction *Term = BB->getTerminator();
1644 Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());
1645 NewBI->setDebugLoc(Term->getDebugLoc());
1646 ++NumFolds;
1647 Term->eraseFromParent();
1648 DTU->applyUpdatesPermissive(Updates);
1649 if (auto *BPI = getBPI())
1650 BPI->eraseBlock(BB);
1651
1652 // If the condition is now dead due to the removal of the old terminator,
1653 // erase it.
1654 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1655 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1656 CondInst->eraseFromParent();
1657 // We can safely replace *some* uses of the CondInst if it has
1658 // exactly one value as returned by LVI. RAUW is incorrect in the
1659 // presence of guards and assumes, that have the `Cond` as the use. This
1660 // is because we use the guards/assume to reason about the `Cond` value
1661 // at the end of block, but RAUW unconditionally replaces all uses
1662 // including the guards/assumes themselves and the uses before the
1663 // guard/assume.
1664 else if (OnlyVal && OnlyVal != MultipleVal)
1665 replaceFoldableUses(CondInst, OnlyVal, BB);
1666 }
1667 return true;
1668 }
1669 }
1670
1671 // Determine which is the most common successor. If we have many inputs and
1672 // this block is a switch, we want to start by threading the batch that goes
1673 // to the most popular destination first. If we only know about one
1674 // threadable destination (the common case) we can avoid this.
1675 BasicBlock *MostPopularDest = OnlyDest;
1676
1677 if (MostPopularDest == MultipleDestSentinel) {
1678 // Remove any loop headers from the Dest list, threadEdge conservatively
1679 // won't process them, but we might have other destination that are eligible
1680 // and we still want to process.
1681 erase_if(PredToDestList,
1682 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1683 return LoopHeaders.contains(PredToDest.second);
1684 });
1685
1686 if (PredToDestList.empty())
1687 return false;
1688
1689 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1690 }
1691
1692 // Now that we know what the most popular destination is, factor all
1693 // predecessors that will jump to it into a single predecessor.
1694 SmallVector<BasicBlock*, 16> PredsToFactor;
1695 for (const auto &PredToDest : PredToDestList)
1696 if (PredToDest.second == MostPopularDest) {
1697 BasicBlock *Pred = PredToDest.first;
1698
1699 // This predecessor may be a switch or something else that has multiple
1700 // edges to the block. Factor each of these edges by listing them
1701 // according to # occurrences in PredsToFactor.
1702 for (BasicBlock *Succ : successors(Pred))
1703 if (Succ == BB)
1704 PredsToFactor.push_back(Pred);
1705 }
1706
1707 // If the threadable edges are branching on an undefined value, we get to pick
1708 // the destination that these predecessors should get to.
1709 if (!MostPopularDest)
1710 MostPopularDest = BB->getTerminator()->
1711 getSuccessor(getBestDestForJumpOnUndef(BB));
1712
1713 // Ok, try to thread it!
1714 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1715}
1716
1717/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1718/// a PHI node (or freeze PHI) in the current block. See if there are any
1719/// simplifications we can do based on inputs to the phi node.
1721 BasicBlock *BB = PN->getParent();
1722
1723 // TODO: We could make use of this to do it once for blocks with common PHI
1724 // values.
1726 PredBBs.resize(1);
1727
1728 // If any of the predecessor blocks end in an unconditional branch, we can
1729 // *duplicate* the conditional branch into that block in order to further
1730 // encourage jump threading and to eliminate cases where we have branch on a
1731 // phi of an icmp (branch on icmp is much better).
1732 // This is still beneficial when a frozen phi is used as the branch condition
1733 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1734 // to br(icmp(freeze ...)).
1735 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1736 BasicBlock *PredBB = PN->getIncomingBlock(i);
1737 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1738 if (PredBr->isUnconditional()) {
1739 PredBBs[0] = PredBB;
1740 // Try to duplicate BB into PredBB.
1741 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1742 return true;
1743 }
1744 }
1745
1746 return false;
1747}
1748
1749/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1750/// a xor instruction in the current block. See if there are any
1751/// simplifications we can do based on inputs to the xor.
1753 BasicBlock *BB = BO->getParent();
1754
1755 // If either the LHS or RHS of the xor is a constant, don't do this
1756 // optimization.
1757 if (isa<ConstantInt>(BO->getOperand(0)) ||
1758 isa<ConstantInt>(BO->getOperand(1)))
1759 return false;
1760
1761 // If the first instruction in BB isn't a phi, we won't be able to infer
1762 // anything special about any particular predecessor.
1763 if (!isa<PHINode>(BB->front()))
1764 return false;
1765
1766 // If this BB is a landing pad, we won't be able to split the edge into it.
1767 if (BB->isEHPad())
1768 return false;
1769
1770 // If we have a xor as the branch input to this block, and we know that the
1771 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1772 // the condition into the predecessor and fix that value to true, saving some
1773 // logical ops on that path and encouraging other paths to simplify.
1774 //
1775 // This copies something like this:
1776 //
1777 // BB:
1778 // %X = phi i1 [1], [%X']
1779 // %Y = icmp eq i32 %A, %B
1780 // %Z = xor i1 %X, %Y
1781 // br i1 %Z, ...
1782 //
1783 // Into:
1784 // BB':
1785 // %Y = icmp ne i32 %A, %B
1786 // br i1 %Y, ...
1787
1788 PredValueInfoTy XorOpValues;
1789 bool isLHS = true;
1790 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1791 WantInteger, BO)) {
1792 assert(XorOpValues.empty());
1793 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1794 WantInteger, BO))
1795 return false;
1796 isLHS = false;
1797 }
1798
1799 assert(!XorOpValues.empty() &&
1800 "computeValueKnownInPredecessors returned true with no values");
1801
1802 // Scan the information to see which is most popular: true or false. The
1803 // predecessors can be of the set true, false, or undef.
1804 unsigned NumTrue = 0, NumFalse = 0;
1805 for (const auto &XorOpValue : XorOpValues) {
1806 if (isa<UndefValue>(XorOpValue.first))
1807 // Ignore undefs for the count.
1808 continue;
1809 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1810 ++NumFalse;
1811 else
1812 ++NumTrue;
1813 }
1814
1815 // Determine which value to split on, true, false, or undef if neither.
1816 ConstantInt *SplitVal = nullptr;
1817 if (NumTrue > NumFalse)
1818 SplitVal = ConstantInt::getTrue(BB->getContext());
1819 else if (NumTrue != 0 || NumFalse != 0)
1820 SplitVal = ConstantInt::getFalse(BB->getContext());
1821
1822 // Collect all of the blocks that this can be folded into so that we can
1823 // factor this once and clone it once.
1824 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1825 for (const auto &XorOpValue : XorOpValues) {
1826 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1827 continue;
1828
1829 BlocksToFoldInto.push_back(XorOpValue.second);
1830 }
1831
1832 // If we inferred a value for all of the predecessors, then duplication won't
1833 // help us. However, we can just replace the LHS or RHS with the constant.
1834 if (BlocksToFoldInto.size() ==
1835 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1836 if (!SplitVal) {
1837 // If all preds provide undef, just nuke the xor, because it is undef too.
1839 BO->eraseFromParent();
1840 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1841 // If all preds provide 0, replace the xor with the other input.
1842 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1843 BO->eraseFromParent();
1844 } else {
1845 // If all preds provide 1, set the computed value to 1.
1846 BO->setOperand(!isLHS, SplitVal);
1847 }
1848
1849 return true;
1850 }
1851
1852 // If any of predecessors end with an indirect goto, we can't change its
1853 // destination.
1854 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1855 return isa<IndirectBrInst>(Pred->getTerminator());
1856 }))
1857 return false;
1858
1859 // Try to duplicate BB into PredBB.
1860 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1861}
1862
1863/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1864/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1865/// NewPred using the entries from OldPred (suitably mapped).
1867 BasicBlock *OldPred,
1868 BasicBlock *NewPred,
1870 for (PHINode &PN : PHIBB->phis()) {
1871 // Ok, we have a PHI node. Figure out what the incoming value was for the
1872 // DestBlock.
1873 Value *IV = PN.getIncomingValueForBlock(OldPred);
1874
1875 // Remap the value if necessary.
1876 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1878 if (I != ValueMap.end())
1879 IV = I->second;
1880 }
1881
1882 PN.addIncoming(IV, NewPred);
1883 }
1884}
1885
1886/// Merge basic block BB into its sole predecessor if possible.
1888 BasicBlock *SinglePred = BB->getSinglePredecessor();
1889 if (!SinglePred)
1890 return false;
1891
1892 const Instruction *TI = SinglePred->getTerminator();
1893 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1894 SinglePred == BB || hasAddressTakenAndUsed(BB))
1895 return false;
1896
1897 // If SinglePred was a loop header, BB becomes one.
1898 if (LoopHeaders.erase(SinglePred))
1899 LoopHeaders.insert(BB);
1900
1901 LVI->eraseBlock(SinglePred);
1902 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1903
1904 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1905 // BB code within one basic block `BB`), we need to invalidate the LVI
1906 // information associated with BB, because the LVI information need not be
1907 // true for all of BB after the merge. For example,
1908 // Before the merge, LVI info and code is as follows:
1909 // SinglePred: <LVI info1 for %p val>
1910 // %y = use of %p
1911 // call @exit() // need not transfer execution to successor.
1912 // assume(%p) // from this point on %p is true
1913 // br label %BB
1914 // BB: <LVI info2 for %p val, i.e. %p is true>
1915 // %x = use of %p
1916 // br label exit
1917 //
1918 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1919 // (info2 and info1 respectively). After the merge and the deletion of the
1920 // LVI info1 for SinglePred. We have the following code:
1921 // BB: <LVI info2 for %p val>
1922 // %y = use of %p
1923 // call @exit()
1924 // assume(%p)
1925 // %x = use of %p <-- LVI info2 is correct from here onwards.
1926 // br label exit
1927 // LVI info2 for BB is incorrect at the beginning of BB.
1928
1929 // Invalidate LVI information for BB if the LVI is not provably true for
1930 // all of BB.
1932 LVI->eraseBlock(BB);
1933 return true;
1934}
1935
1936/// Update the SSA form. NewBB contains instructions that are copied from BB.
1937/// ValueMapping maps old values in BB to new ones in NewBB.
1939 ValueToValueMapTy &ValueMapping) {
1940 // If there were values defined in BB that are used outside the block, then we
1941 // now have to update all uses of the value to use either the original value,
1942 // the cloned value, or some PHI derived value. This can require arbitrary
1943 // PHI insertion, of which we are prepared to do, clean these up now.
1944 SSAUpdater SSAUpdate;
1945 SmallVector<Use *, 16> UsesToRename;
1947 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1948
1949 for (Instruction &I : *BB) {
1950 // Scan all uses of this instruction to see if it is used outside of its
1951 // block, and if so, record them in UsesToRename.
1952 for (Use &U : I.uses()) {
1953 Instruction *User = cast<Instruction>(U.getUser());
1954 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1955 if (UserPN->getIncomingBlock(U) == BB)
1956 continue;
1957 } else if (User->getParent() == BB)
1958 continue;
1959
1960 UsesToRename.push_back(&U);
1961 }
1962
1963 // Find debug values outside of the block
1964 findDbgValues(DbgValues, &I, &DbgVariableRecords);
1965 llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
1966 return DbgVal->getParent() == BB;
1967 });
1968 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
1969 return DbgVarRec->getParent() == BB;
1970 });
1971
1972 // If there are no uses outside the block, we're done with this instruction.
1973 if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())
1974 continue;
1975 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1976
1977 // We found a use of I outside of BB. Rename all uses of I that are outside
1978 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1979 // with the two values we know.
1980 SSAUpdate.Initialize(I.getType(), I.getName());
1981 SSAUpdate.AddAvailableValue(BB, &I);
1982 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1983
1984 while (!UsesToRename.empty())
1985 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1986 if (!DbgValues.empty() || !DbgVariableRecords.empty()) {
1987 SSAUpdate.UpdateDebugValues(&I, DbgValues);
1988 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
1989 DbgValues.clear();
1990 DbgVariableRecords.clear();
1991 }
1992
1993 LLVM_DEBUG(dbgs() << "\n");
1994 }
1995}
1996
1997/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
1998/// arguments that come from PredBB. Return the map from the variables in the
1999/// source basic block to the variables in the newly created basic block.
2000
2004 BasicBlock *NewBB,
2005 BasicBlock *PredBB) {
2006 // We are going to have to map operands from the source basic block to the new
2007 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2008 // block, evaluate them to account for entry from PredBB.
2009
2010 // Retargets llvm.dbg.value to any renamed variables.
2011 auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
2012 auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2013 if (!DbgInstruction)
2014 return false;
2015
2016 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2017 for (auto DbgOperand : DbgInstruction->location_ops()) {
2018 auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2019 if (!DbgOperandInstruction)
2020 continue;
2021
2022 auto I = ValueMapping.find(DbgOperandInstruction);
2023 if (I != ValueMapping.end()) {
2024 OperandsToRemap.insert(
2025 std::pair<Value *, Value *>(DbgOperand, I->second));
2026 }
2027 }
2028
2029 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2030 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2031 return true;
2032 };
2033
2034 // Duplicate implementation of the above dbg.value code, using
2035 // DbgVariableRecords instead.
2036 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2037 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2038 for (auto *Op : DVR->location_ops()) {
2039 Instruction *OpInst = dyn_cast<Instruction>(Op);
2040 if (!OpInst)
2041 continue;
2042
2043 auto I = ValueMapping.find(OpInst);
2044 if (I != ValueMapping.end())
2045 OperandsToRemap.insert({OpInst, I->second});
2046 }
2047
2048 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2049 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2050 };
2051
2052 BasicBlock *RangeBB = BI->getParent();
2053
2054 // Clone the phi nodes of the source basic block into NewBB. The resulting
2055 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2056 // might need to rewrite the operand of the cloned phi.
2057 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2058 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2059 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2060 ValueMapping[PN] = NewPN;
2061 }
2062
2063 // Clone noalias scope declarations in the threaded block. When threading a
2064 // loop exit, we would otherwise end up with two idential scope declarations
2065 // visible at the same time.
2066 SmallVector<MDNode *> NoAliasScopes;
2067 DenseMap<MDNode *, MDNode *> ClonedScopes;
2068 LLVMContext &Context = PredBB->getContext();
2069 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2070 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2071
2072 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2073 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2074 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2075 RetargetDbgVariableRecordIfPossible(&DVR);
2076 };
2077
2078 // Clone the non-phi instructions of the source basic block into NewBB,
2079 // keeping track of the mapping and using it to remap operands in the cloned
2080 // instructions.
2081 for (; BI != BE; ++BI) {
2082 Instruction *New = BI->clone();
2083 New->setName(BI->getName());
2084 New->insertInto(NewBB, NewBB->end());
2085 ValueMapping[&*BI] = New;
2086 adaptNoAliasScopes(New, ClonedScopes, Context);
2087
2088 CloneAndRemapDbgInfo(New, &*BI);
2089
2090 if (RetargetDbgValueIfPossible(New))
2091 continue;
2092
2093 // Remap operands to patch up intra-block references.
2094 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2095 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2096 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2097 if (I != ValueMapping.end())
2098 New->setOperand(i, I->second);
2099 }
2100 }
2101
2102 // There may be DbgVariableRecords on the terminator, clone directly from
2103 // marker to marker as there isn't an instruction there.
2104 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2105 // Dump them at the end.
2106 DbgMarker *Marker = RangeBB->getMarker(BE);
2107 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2108 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2109 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2110 RetargetDbgVariableRecordIfPossible(&DVR);
2111 }
2112}
2113
2114/// Attempt to thread through two successive basic blocks.
2116 Value *Cond) {
2117 // Consider:
2118 //
2119 // PredBB:
2120 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2121 // %tobool = icmp eq i32 %cond, 0
2122 // br i1 %tobool, label %BB, label ...
2123 //
2124 // BB:
2125 // %cmp = icmp eq i32* %var, null
2126 // br i1 %cmp, label ..., label ...
2127 //
2128 // We don't know the value of %var at BB even if we know which incoming edge
2129 // we take to BB. However, once we duplicate PredBB for each of its incoming
2130 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2131 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2132
2133 // Require that BB end with a Branch for simplicity.
2134 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2135 if (!CondBr)
2136 return false;
2137
2138 // BB must have exactly one predecessor.
2139 BasicBlock *PredBB = BB->getSinglePredecessor();
2140 if (!PredBB)
2141 return false;
2142
2143 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2144 // unconditional branch, we should be merging PredBB and BB instead. For
2145 // simplicity, we don't deal with a switch.
2146 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2147 if (!PredBBBranch || PredBBBranch->isUnconditional())
2148 return false;
2149
2150 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2151 // PredBB.
2152 if (PredBB->getSinglePredecessor())
2153 return false;
2154
2155 // Don't thread through PredBB if it contains a successor edge to itself, in
2156 // which case we would infinite loop. Suppose we are threading an edge from
2157 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2158 // successor edge to itself. If we allowed jump threading in this case, we
2159 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2160 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2161 // with another jump threading opportunity from PredBB.thread through PredBB
2162 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2163 // would keep peeling one iteration from PredBB.
2164 if (llvm::is_contained(successors(PredBB), PredBB))
2165 return false;
2166
2167 // Don't thread across a loop header.
2168 if (LoopHeaders.count(PredBB))
2169 return false;
2170
2171 // Avoid complication with duplicating EH pads.
2172 if (PredBB->isEHPad())
2173 return false;
2174
2175 // Find a predecessor that we can thread. For simplicity, we only consider a
2176 // successor edge out of BB to which we thread exactly one incoming edge into
2177 // PredBB.
2178 unsigned ZeroCount = 0;
2179 unsigned OneCount = 0;
2180 BasicBlock *ZeroPred = nullptr;
2181 BasicBlock *OnePred = nullptr;
2182 const DataLayout &DL = BB->getDataLayout();
2183 for (BasicBlock *P : predecessors(PredBB)) {
2184 // If PredPred ends with IndirectBrInst, we can't handle it.
2185 if (isa<IndirectBrInst>(P->getTerminator()))
2186 continue;
2187 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2189 if (CI->isZero()) {
2190 ZeroCount++;
2191 ZeroPred = P;
2192 } else if (CI->isOne()) {
2193 OneCount++;
2194 OnePred = P;
2195 }
2196 }
2197 }
2198
2199 // Disregard complicated cases where we have to thread multiple edges.
2200 BasicBlock *PredPredBB;
2201 if (ZeroCount == 1) {
2202 PredPredBB = ZeroPred;
2203 } else if (OneCount == 1) {
2204 PredPredBB = OnePred;
2205 } else {
2206 return false;
2207 }
2208
2209 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2210
2211 // If threading to the same block as we come from, we would infinite loop.
2212 if (SuccBB == BB) {
2213 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2214 << "' - would thread to self!\n");
2215 return false;
2216 }
2217
2218 // If threading this would thread across a loop header, don't thread the edge.
2219 // See the comments above findLoopHeaders for justifications and caveats.
2220 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2221 LLVM_DEBUG({
2222 bool BBIsHeader = LoopHeaders.count(BB);
2223 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2224 dbgs() << " Not threading across "
2225 << (BBIsHeader ? "loop header BB '" : "block BB '")
2226 << BB->getName() << "' to dest "
2227 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2228 << SuccBB->getName()
2229 << "' - it might create an irreducible loop!\n";
2230 });
2231 return false;
2232 }
2233
2234 // Compute the cost of duplicating BB and PredBB.
2235 unsigned BBCost = getJumpThreadDuplicationCost(
2236 TTI, BB, BB->getTerminator(), BBDupThreshold);
2237 unsigned PredBBCost = getJumpThreadDuplicationCost(
2238 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2239
2240 // Give up if costs are too high. We need to check BBCost and PredBBCost
2241 // individually before checking their sum because getJumpThreadDuplicationCost
2242 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2243 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2244 BBCost + PredBBCost > BBDupThreshold) {
2245 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2246 << "' - Cost is too high: " << PredBBCost
2247 << " for PredBB, " << BBCost << "for BB\n");
2248 return false;
2249 }
2250
2251 // Now we are ready to duplicate PredBB.
2252 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2253 return true;
2254}
2255
2257 BasicBlock *PredBB,
2258 BasicBlock *BB,
2259 BasicBlock *SuccBB) {
2260 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2261 << BB->getName() << "'\n");
2262
2263 // Build BPI/BFI before any changes are made to IR.
2264 bool HasProfile = doesBlockHaveProfileData(BB);
2265 auto *BFI = getOrCreateBFI(HasProfile);
2266 auto *BPI = getOrCreateBPI(BFI != nullptr);
2267
2268 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2269 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2270
2271 BasicBlock *NewBB =
2272 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2273 PredBB->getParent(), PredBB);
2274 NewBB->moveAfter(PredBB);
2275
2276 // Set the block frequency of NewBB.
2277 if (BFI) {
2278 assert(BPI && "It's expected BPI to exist along with BFI");
2279 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2280 BPI->getEdgeProbability(PredPredBB, PredBB);
2281 BFI->setBlockFreq(NewBB, NewBBFreq);
2282 }
2283
2284 // We are going to have to map operands from the original BB block to the new
2285 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2286 // to account for entry from PredPredBB.
2287 ValueToValueMapTy ValueMapping;
2288 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2289 PredPredBB);
2290
2291 // Copy the edge probabilities from PredBB to NewBB.
2292 if (BPI)
2293 BPI->copyEdgeProbabilities(PredBB, NewBB);
2294
2295 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2296 // This eliminates predecessors from PredPredBB, which requires us to simplify
2297 // any PHI nodes in PredBB.
2298 Instruction *PredPredTerm = PredPredBB->getTerminator();
2299 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2300 if (PredPredTerm->getSuccessor(i) == PredBB) {
2301 PredBB->removePredecessor(PredPredBB, true);
2302 PredPredTerm->setSuccessor(i, NewBB);
2303 }
2304
2305 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2306 ValueMapping);
2307 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2308 ValueMapping);
2309
2310 DTU->applyUpdatesPermissive(
2311 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2312 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2313 {DominatorTree::Insert, PredPredBB, NewBB},
2314 {DominatorTree::Delete, PredPredBB, PredBB}});
2315
2316 updateSSA(PredBB, NewBB, ValueMapping);
2317
2318 // Clean up things like PHI nodes with single operands, dead instructions,
2319 // etc.
2320 SimplifyInstructionsInBlock(NewBB, TLI);
2321 SimplifyInstructionsInBlock(PredBB, TLI);
2322
2323 SmallVector<BasicBlock *, 1> PredsToFactor;
2324 PredsToFactor.push_back(NewBB);
2325 threadEdge(BB, PredsToFactor, SuccBB);
2326}
2327
2328/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2330 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2331 BasicBlock *SuccBB) {
2332 // If threading to the same block as we come from, we would infinite loop.
2333 if (SuccBB == BB) {
2334 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2335 << "' - would thread to self!\n");
2336 return false;
2337 }
2338
2339 // If threading this would thread across a loop header, don't thread the edge.
2340 // See the comments above findLoopHeaders for justifications and caveats.
2341 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2342 LLVM_DEBUG({
2343 bool BBIsHeader = LoopHeaders.count(BB);
2344 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2345 dbgs() << " Not threading across "
2346 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2347 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2348 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2349 });
2350 return false;
2351 }
2352
2353 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2354 TTI, BB, BB->getTerminator(), BBDupThreshold);
2355 if (JumpThreadCost > BBDupThreshold) {
2356 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2357 << "' - Cost is too high: " << JumpThreadCost << "\n");
2358 return false;
2359 }
2360
2361 threadEdge(BB, PredBBs, SuccBB);
2362 return true;
2363}
2364
2365/// threadEdge - We have decided that it is safe and profitable to factor the
2366/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2367/// across BB. Transform the IR to reflect this change.
2369 const SmallVectorImpl<BasicBlock *> &PredBBs,
2370 BasicBlock *SuccBB) {
2371 assert(SuccBB != BB && "Don't create an infinite loop");
2372
2373 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2374 "Don't thread across loop headers");
2375
2376 // Build BPI/BFI before any changes are made to IR.
2377 bool HasProfile = doesBlockHaveProfileData(BB);
2378 auto *BFI = getOrCreateBFI(HasProfile);
2379 auto *BPI = getOrCreateBPI(BFI != nullptr);
2380
2381 // And finally, do it! Start by factoring the predecessors if needed.
2382 BasicBlock *PredBB;
2383 if (PredBBs.size() == 1)
2384 PredBB = PredBBs[0];
2385 else {
2386 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2387 << " common predecessors.\n");
2388 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2389 }
2390
2391 // And finally, do it!
2392 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2393 << "' to '" << SuccBB->getName()
2394 << ", across block:\n " << *BB << "\n");
2395
2396 LVI->threadEdge(PredBB, BB, SuccBB);
2397
2399 BB->getName()+".thread",
2400 BB->getParent(), BB);
2401 NewBB->moveAfter(PredBB);
2402
2403 // Set the block frequency of NewBB.
2404 if (BFI) {
2405 assert(BPI && "It's expected BPI to exist along with BFI");
2406 auto NewBBFreq =
2407 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2408 BFI->setBlockFreq(NewBB, NewBBFreq);
2409 }
2410
2411 // Copy all the instructions from BB to NewBB except the terminator.
2412 ValueToValueMapTy ValueMapping;
2413 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2414 PredBB);
2415
2416 // We didn't copy the terminator from BB over to NewBB, because there is now
2417 // an unconditional jump to SuccBB. Insert the unconditional jump.
2418 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2419 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2420
2421 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2422 // PHI nodes for NewBB now.
2423 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2424
2425 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2426 // eliminates predecessors from BB, which requires us to simplify any PHI
2427 // nodes in BB.
2428 Instruction *PredTerm = PredBB->getTerminator();
2429 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2430 if (PredTerm->getSuccessor(i) == BB) {
2431 BB->removePredecessor(PredBB, true);
2432 PredTerm->setSuccessor(i, NewBB);
2433 }
2434
2435 // Enqueue required DT updates.
2436 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2437 {DominatorTree::Insert, PredBB, NewBB},
2438 {DominatorTree::Delete, PredBB, BB}});
2439
2440 updateSSA(BB, NewBB, ValueMapping);
2441
2442 // At this point, the IR is fully up to date and consistent. Do a quick scan
2443 // over the new instructions and zap any that are constants or dead. This
2444 // frequently happens because of phi translation.
2445 SimplifyInstructionsInBlock(NewBB, TLI);
2446
2447 // Update the edge weight from BB to SuccBB, which should be less than before.
2448 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2449
2450 // Threaded an edge!
2451 ++NumThreads;
2452}
2453
2454/// Create a new basic block that will be the predecessor of BB and successor of
2455/// all blocks in Preds. When profile data is available, update the frequency of
2456/// this new block.
2457BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2459 const char *Suffix) {
2461
2462 // Collect the frequencies of all predecessors of BB, which will be used to
2463 // update the edge weight of the result of splitting predecessors.
2465 auto *BFI = getBFI();
2466 if (BFI) {
2467 auto *BPI = getOrCreateBPI(true);
2468 for (auto *Pred : Preds)
2469 FreqMap.insert(std::make_pair(
2470 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2471 }
2472
2473 // In the case when BB is a LandingPad block we create 2 new predecessors
2474 // instead of just one.
2475 if (BB->isLandingPad()) {
2476 std::string NewName = std::string(Suffix) + ".split-lp";
2477 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2478 } else {
2479 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2480 }
2481
2482 std::vector<DominatorTree::UpdateType> Updates;
2483 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2484 for (auto *NewBB : NewBBs) {
2485 BlockFrequency NewBBFreq(0);
2486 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2487 for (auto *Pred : predecessors(NewBB)) {
2488 Updates.push_back({DominatorTree::Delete, Pred, BB});
2489 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2490 if (BFI) // Update frequencies between Pred -> NewBB.
2491 NewBBFreq += FreqMap.lookup(Pred);
2492 }
2493 if (BFI) // Apply the summed frequency to NewBB.
2494 BFI->setBlockFreq(NewBB, NewBBFreq);
2495 }
2496
2497 DTU->applyUpdatesPermissive(Updates);
2498 return NewBBs[0];
2499}
2500
2501bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2502 const Instruction *TI = BB->getTerminator();
2503 if (!TI || TI->getNumSuccessors() < 2)
2504 return false;
2505
2506 return hasValidBranchWeightMD(*TI);
2507}
2508
2509/// Update the block frequency of BB and branch weight and the metadata on the
2510/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2511/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2512void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2513 BasicBlock *BB,
2514 BasicBlock *NewBB,
2515 BasicBlock *SuccBB,
2516 BlockFrequencyInfo *BFI,
2518 bool HasProfile) {
2519 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2520 "Both BFI & BPI should either be set or unset");
2521
2522 if (!BFI) {
2523 assert(!HasProfile &&
2524 "It's expected to have BFI/BPI when profile info exists");
2525 return;
2526 }
2527
2528 // As the edge from PredBB to BB is deleted, we have to update the block
2529 // frequency of BB.
2530 auto BBOrigFreq = BFI->getBlockFreq(BB);
2531 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2532 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2533 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2534 BFI->setBlockFreq(BB, BBNewFreq);
2535
2536 // Collect updated outgoing edges' frequencies from BB and use them to update
2537 // edge probabilities.
2538 SmallVector<uint64_t, 4> BBSuccFreq;
2539 for (BasicBlock *Succ : successors(BB)) {
2540 auto SuccFreq = (Succ == SuccBB)
2541 ? BB2SuccBBFreq - NewBBFreq
2542 : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2543 BBSuccFreq.push_back(SuccFreq.getFrequency());
2544 }
2545
2546 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2547
2549 if (MaxBBSuccFreq == 0)
2550 BBSuccProbs.assign(BBSuccFreq.size(),
2551 {1, static_cast<unsigned>(BBSuccFreq.size())});
2552 else {
2553 for (uint64_t Freq : BBSuccFreq)
2554 BBSuccProbs.push_back(
2555 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2556 // Normalize edge probabilities so that they sum up to one.
2558 BBSuccProbs.end());
2559 }
2560
2561 // Update edge probabilities in BPI.
2562 BPI->setEdgeProbability(BB, BBSuccProbs);
2563
2564 // Update the profile metadata as well.
2565 //
2566 // Don't do this if the profile of the transformed blocks was statically
2567 // estimated. (This could occur despite the function having an entry
2568 // frequency in completely cold parts of the CFG.)
2569 //
2570 // In this case we don't want to suggest to subsequent passes that the
2571 // calculated weights are fully consistent. Consider this graph:
2572 //
2573 // check_1
2574 // 50% / |
2575 // eq_1 | 50%
2576 // \ |
2577 // check_2
2578 // 50% / |
2579 // eq_2 | 50%
2580 // \ |
2581 // check_3
2582 // 50% / |
2583 // eq_3 | 50%
2584 // \ |
2585 //
2586 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2587 // the overall probabilities are inconsistent; the total probability that the
2588 // value is either 1, 2 or 3 is 150%.
2589 //
2590 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2591 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2592 // the loop exit edge. Then based solely on static estimation we would assume
2593 // the loop was extremely hot.
2594 //
2595 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2596 // shouldn't make edges extremely likely or unlikely based solely on static
2597 // estimation.
2598 if (BBSuccProbs.size() >= 2 && HasProfile) {
2600 for (auto Prob : BBSuccProbs)
2601 Weights.push_back(Prob.getNumerator());
2602
2603 auto TI = BB->getTerminator();
2604 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2605 }
2606}
2607
2608/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2609/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2610/// If we can duplicate the contents of BB up into PredBB do so now, this
2611/// improves the odds that the branch will be on an analyzable instruction like
2612/// a compare.
2614 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2615 assert(!PredBBs.empty() && "Can't handle an empty set");
2616
2617 // If BB is a loop header, then duplicating this block outside the loop would
2618 // cause us to transform this into an irreducible loop, don't do this.
2619 // See the comments above findLoopHeaders for justifications and caveats.
2620 if (LoopHeaders.count(BB)) {
2621 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2622 << "' into predecessor block '" << PredBBs[0]->getName()
2623 << "' - it might create an irreducible loop!\n");
2624 return false;
2625 }
2626
2627 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2628 TTI, BB, BB->getTerminator(), BBDupThreshold);
2629 if (DuplicationCost > BBDupThreshold) {
2630 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2631 << "' - Cost is too high: " << DuplicationCost << "\n");
2632 return false;
2633 }
2634
2635 // And finally, do it! Start by factoring the predecessors if needed.
2636 std::vector<DominatorTree::UpdateType> Updates;
2637 BasicBlock *PredBB;
2638 if (PredBBs.size() == 1)
2639 PredBB = PredBBs[0];
2640 else {
2641 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2642 << " common predecessors.\n");
2643 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2644 }
2645 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2646
2647 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2648 // of PredBB.
2649 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2650 << "' into end of '" << PredBB->getName()
2651 << "' to eliminate branch on phi. Cost: "
2652 << DuplicationCost << " block is:" << *BB << "\n");
2653
2654 // Unless PredBB ends with an unconditional branch, split the edge so that we
2655 // can just clone the bits from BB into the end of the new PredBB.
2656 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2657
2658 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2659 BasicBlock *OldPredBB = PredBB;
2660 PredBB = SplitEdge(OldPredBB, BB);
2661 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2662 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2663 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2664 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2665 }
2666
2667 // We are going to have to map operands from the original BB block into the
2668 // PredBB block. Evaluate PHI nodes in BB.
2669 ValueToValueMapTy ValueMapping;
2670
2671 BasicBlock::iterator BI = BB->begin();
2672 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2673 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2674 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2675 // mapping and using it to remap operands in the cloned instructions.
2676 for (; BI != BB->end(); ++BI) {
2677 Instruction *New = BI->clone();
2678 New->insertInto(PredBB, OldPredBranch->getIterator());
2679
2680 // Remap operands to patch up intra-block references.
2681 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2682 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2683 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2684 if (I != ValueMapping.end())
2685 New->setOperand(i, I->second);
2686 }
2687
2688 // Remap debug variable operands.
2689 remapDebugVariable(ValueMapping, New);
2690
2691 // If this instruction can be simplified after the operands are updated,
2692 // just use the simplified value instead. This frequently happens due to
2693 // phi translation.
2695 New,
2696 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2697 ValueMapping[&*BI] = IV;
2698 if (!New->mayHaveSideEffects()) {
2699 New->eraseFromParent();
2700 New = nullptr;
2701 // Clone debug-info on the elided instruction to the destination
2702 // position.
2703 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2704 }
2705 } else {
2706 ValueMapping[&*BI] = New;
2707 }
2708 if (New) {
2709 // Otherwise, insert the new instruction into the block.
2710 New->setName(BI->getName());
2711 // Clone across any debug-info attached to the old instruction.
2712 New->cloneDebugInfoFrom(&*BI);
2713 // Update Dominance from simplified New instruction operands.
2714 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2715 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2716 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2717 }
2718 }
2719
2720 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2721 // add entries to the PHI nodes for branch from PredBB now.
2722 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2723 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2724 ValueMapping);
2725 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2726 ValueMapping);
2727
2728 updateSSA(BB, PredBB, ValueMapping);
2729
2730 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2731 // that we nuked.
2732 BB->removePredecessor(PredBB, true);
2733
2734 // Remove the unconditional branch at the end of the PredBB block.
2735 OldPredBranch->eraseFromParent();
2736 if (auto *BPI = getBPI())
2737 BPI->copyEdgeProbabilities(BB, PredBB);
2738 DTU->applyUpdatesPermissive(Updates);
2739
2740 ++NumDupes;
2741 return true;
2742}
2743
2744// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2745// a Select instruction in Pred. BB has other predecessors and SI is used in
2746// a PHI node in BB. SI has no other use.
2747// A new basic block, NewBB, is created and SI is converted to compare and
2748// conditional branch. SI is erased from parent.
2750 SelectInst *SI, PHINode *SIUse,
2751 unsigned Idx) {
2752 // Expand the select.
2753 //
2754 // Pred --
2755 // | v
2756 // | NewBB
2757 // | |
2758 // |-----
2759 // v
2760 // BB
2761 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2762 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2763 BB->getParent(), BB);
2764 // Move the unconditional branch to NewBB.
2765 PredTerm->removeFromParent();
2766 PredTerm->insertInto(NewBB, NewBB->end());
2767 // Create a conditional branch and update PHI nodes.
2768 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2769 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2770 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2771 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2772 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2773
2774 uint64_t TrueWeight = 1;
2775 uint64_t FalseWeight = 1;
2776 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2777 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2778 (TrueWeight + FalseWeight) != 0) {
2781 TrueWeight, TrueWeight + FalseWeight));
2783 FalseWeight, TrueWeight + FalseWeight));
2784 // Update BPI if exists.
2785 if (auto *BPI = getBPI())
2786 BPI->setEdgeProbability(Pred, BP);
2787 }
2788 // Set the block frequency of NewBB.
2789 if (auto *BFI = getBFI()) {
2790 if ((TrueWeight + FalseWeight) == 0) {
2791 TrueWeight = 1;
2792 FalseWeight = 1;
2793 }
2795 TrueWeight, TrueWeight + FalseWeight);
2796 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2797 BFI->setBlockFreq(NewBB, NewBBFreq);
2798 }
2799
2800 // The select is now dead.
2801 SI->eraseFromParent();
2802 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2803 {DominatorTree::Insert, Pred, NewBB}});
2804
2805 // Update any other PHI nodes in BB.
2806 for (BasicBlock::iterator BI = BB->begin();
2807 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2808 if (Phi != SIUse)
2809 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2810}
2811
2813 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2814
2815 if (!CondPHI || CondPHI->getParent() != BB)
2816 return false;
2817
2818 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2819 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2820 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2821
2822 // The second and third condition can be potentially relaxed. Currently
2823 // the conditions help to simplify the code and allow us to reuse existing
2824 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2825 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2826 continue;
2827
2828 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2829 if (!PredTerm || !PredTerm->isUnconditional())
2830 continue;
2831
2832 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2833 return true;
2834 }
2835 return false;
2836}
2837
2838/// tryToUnfoldSelect - Look for blocks of the form
2839/// bb1:
2840/// %a = select
2841/// br bb2
2842///
2843/// bb2:
2844/// %p = phi [%a, %bb1] ...
2845/// %c = icmp %p
2846/// br i1 %c
2847///
2848/// And expand the select into a branch structure if one of its arms allows %c
2849/// to be folded. This later enables threading from bb1 over bb2.
2851 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2852 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2853 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2854
2855 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2856 CondLHS->getParent() != BB)
2857 return false;
2858
2859 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2860 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2861 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2862
2863 // Look if one of the incoming values is a select in the corresponding
2864 // predecessor.
2865 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2866 continue;
2867
2868 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2869 if (!PredTerm || !PredTerm->isUnconditional())
2870 continue;
2871
2872 // Now check if one of the select values would allow us to constant fold the
2873 // terminator in BB. We don't do the transform if both sides fold, those
2874 // cases will be threaded in any case.
2875 Constant *LHSRes =
2876 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2877 CondRHS, Pred, BB, CondCmp);
2878 Constant *RHSRes =
2879 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2880 CondRHS, Pred, BB, CondCmp);
2881 if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2882 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2883 return true;
2884 }
2885 }
2886 return false;
2887}
2888
2889/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2890/// same BB in the form
2891/// bb:
2892/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2893/// %s = select %p, trueval, falseval
2894///
2895/// or
2896///
2897/// bb:
2898/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2899/// %c = cmp %p, 0
2900/// %s = select %c, trueval, falseval
2901///
2902/// And expand the select into a branch structure. This later enables
2903/// jump-threading over bb in this pass.
2904///
2905/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2906/// select if the associated PHI has at least one constant. If the unfolded
2907/// select is not jump-threaded, it will be folded again in the later
2908/// optimizations.
2910 // This transform would reduce the quality of msan diagnostics.
2911 // Disable this transform under MemorySanitizer.
2912 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2913 return false;
2914
2915 // If threading this would thread across a loop header, don't thread the edge.
2916 // See the comments above findLoopHeaders for justifications and caveats.
2917 if (LoopHeaders.count(BB))
2918 return false;
2919
2920 for (BasicBlock::iterator BI = BB->begin();
2921 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2922 // Look for a Phi having at least one constant incoming value.
2923 if (llvm::all_of(PN->incoming_values(),
2924 [](Value *V) { return !isa<ConstantInt>(V); }))
2925 continue;
2926
2927 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2928 using namespace PatternMatch;
2929
2930 // Check if SI is in BB and use V as condition.
2931 if (SI->getParent() != BB)
2932 return false;
2933 Value *Cond = SI->getCondition();
2934 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2935 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2936 };
2937
2938 SelectInst *SI = nullptr;
2939 for (Use &U : PN->uses()) {
2940 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2941 // Look for a ICmp in BB that compares PN with a constant and is the
2942 // condition of a Select.
2943 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2944 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2945 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2946 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2947 SI = SelectI;
2948 break;
2949 }
2950 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2951 // Look for a Select in BB that uses PN as condition.
2952 if (isUnfoldCandidate(SelectI, U.get())) {
2953 SI = SelectI;
2954 break;
2955 }
2956 }
2957 }
2958
2959 if (!SI)
2960 continue;
2961 // Expand the select.
2962 Value *Cond = SI->getCondition();
2963 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2964 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
2965 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2966 Instruction *Term =
2967 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2968 BasicBlock *SplitBB = SI->getParent();
2969 BasicBlock *NewBB = Term->getParent();
2970 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
2971 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2972 NewPN->addIncoming(SI->getFalseValue(), BB);
2973 NewPN->setDebugLoc(SI->getDebugLoc());
2974 SI->replaceAllUsesWith(NewPN);
2975 SI->eraseFromParent();
2976 // NewBB and SplitBB are newly created blocks which require insertion.
2977 std::vector<DominatorTree::UpdateType> Updates;
2978 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2979 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2980 Updates.push_back({DominatorTree::Insert, BB, NewBB});
2981 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2982 // BB's successors were moved to SplitBB, update DTU accordingly.
2983 for (auto *Succ : successors(SplitBB)) {
2984 Updates.push_back({DominatorTree::Delete, BB, Succ});
2985 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2986 }
2987 DTU->applyUpdatesPermissive(Updates);
2988 return true;
2989 }
2990 return false;
2991}
2992
2993/// Try to propagate a guard from the current BB into one of its predecessors
2994/// in case if another branch of execution implies that the condition of this
2995/// guard is always true. Currently we only process the simplest case that
2996/// looks like:
2997///
2998/// Start:
2999/// %cond = ...
3000/// br i1 %cond, label %T1, label %F1
3001/// T1:
3002/// br label %Merge
3003/// F1:
3004/// br label %Merge
3005/// Merge:
3006/// %condGuard = ...
3007/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3008///
3009/// And cond either implies condGuard or !condGuard. In this case all the
3010/// instructions before the guard can be duplicated in both branches, and the
3011/// guard is then threaded to one of them.
3013 using namespace PatternMatch;
3014
3015 // We only want to deal with two predecessors.
3016 BasicBlock *Pred1, *Pred2;
3017 auto PI = pred_begin(BB), PE = pred_end(BB);
3018 if (PI == PE)
3019 return false;
3020 Pred1 = *PI++;
3021 if (PI == PE)
3022 return false;
3023 Pred2 = *PI++;
3024 if (PI != PE)
3025 return false;
3026 if (Pred1 == Pred2)
3027 return false;
3028
3029 // Try to thread one of the guards of the block.
3030 // TODO: Look up deeper than to immediate predecessor?
3031 auto *Parent = Pred1->getSinglePredecessor();
3032 if (!Parent || Parent != Pred2->getSinglePredecessor())
3033 return false;
3034
3035 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3036 for (auto &I : *BB)
3037 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3038 return true;
3039
3040 return false;
3041}
3042
3043/// Try to propagate the guard from BB which is the lower block of a diamond
3044/// to one of its branches, in case if diamond's condition implies guard's
3045/// condition.
3047 BranchInst *BI) {
3048 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3049 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3050 Value *GuardCond = Guard->getArgOperand(0);
3051 Value *BranchCond = BI->getCondition();
3052 BasicBlock *TrueDest = BI->getSuccessor(0);
3053 BasicBlock *FalseDest = BI->getSuccessor(1);
3054
3055 auto &DL = BB->getDataLayout();
3056 bool TrueDestIsSafe = false;
3057 bool FalseDestIsSafe = false;
3058
3059 // True dest is safe if BranchCond => GuardCond.
3060 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3061 if (Impl && *Impl)
3062 TrueDestIsSafe = true;
3063 else {
3064 // False dest is safe if !BranchCond => GuardCond.
3065 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3066 if (Impl && *Impl)
3067 FalseDestIsSafe = true;
3068 }
3069
3070 if (!TrueDestIsSafe && !FalseDestIsSafe)
3071 return false;
3072
3073 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3074 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3075
3076 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3077 Instruction *AfterGuard = Guard->getNextNode();
3078 unsigned Cost =
3079 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3080 if (Cost > BBDupThreshold)
3081 return false;
3082 // Duplicate all instructions before the guard and the guard itself to the
3083 // branch where implication is not proved.
3085 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3086 assert(GuardedBlock && "Could not create the guarded block?");
3087 // Duplicate all instructions before the guard in the unguarded branch.
3088 // Since we have successfully duplicated the guarded block and this block
3089 // has fewer instructions, we expect it to succeed.
3091 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3092 assert(UnguardedBlock && "Could not create the unguarded block?");
3093 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3094 << GuardedBlock->getName() << "\n");
3095 // Some instructions before the guard may still have uses. For them, we need
3096 // to create Phi nodes merging their copies in both guarded and unguarded
3097 // branches. Those instructions that have no uses can be just removed.
3099 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3100 if (!isa<PHINode>(&*BI))
3101 ToRemove.push_back(&*BI);
3102
3104 assert(InsertionPoint != BB->end() && "Empty block?");
3105 // Substitute with Phis & remove.
3106 for (auto *Inst : reverse(ToRemove)) {
3107 if (!Inst->use_empty()) {
3108 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3109 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3110 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3111 NewPN->setDebugLoc(Inst->getDebugLoc());
3113 Inst->replaceAllUsesWith(NewPN);
3114 }
3115 Inst->dropDbgRecords();
3116 Inst->eraseFromParent();
3117 }
3118 return true;
3119}
3120
3121PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3125
3126 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3127 // TODO: Would be nice to verify BPI/BFI consistency as well.
3128 return PA;
3129}
3130
3131template <typename AnalysisT>
3132typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3133 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3134
3135 // If there were no changes since last call to 'runExternalAnalysis' then all
3136 // analysis is either up to date or explicitly invalidated. Just go ahead and
3137 // run the "external" analysis.
3138 if (!ChangedSinceLastAnalysisUpdate) {
3139 assert(!DTU->hasPendingUpdates() &&
3140 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3141 // Run the "external" analysis.
3142 return &FAM->getResult<AnalysisT>(*F);
3143 }
3144 ChangedSinceLastAnalysisUpdate = false;
3145
3146 auto PA = getPreservedAnalysis();
3147 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3148 // as preserved.
3149 PA.preserve<BranchProbabilityAnalysis>();
3150 PA.preserve<BlockFrequencyAnalysis>();
3151 // Report everything except explicitly preserved as invalid.
3152 FAM->invalidate(*F, PA);
3153 // Update DT/PDT.
3154 DTU->flush();
3155 // Make sure DT/PDT are valid before running "external" analysis.
3156 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3157 assert((!DTU->hasPostDomTree() ||
3158 DTU->getPostDomTree().verify(
3160 // Run the "external" analysis.
3161 auto *Result = &FAM->getResult<AnalysisT>(*F);
3162 // Update analysis JumpThreading depends on and not explicitly preserved.
3163 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3164 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3165 AA = &FAM->getResult<AAManager>(*F);
3166
3167 return Result;
3168}
3169
3170BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3171 if (!BPI) {
3172 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3174 }
3175 return *BPI;
3176}
3177
3178BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3179 if (!BFI) {
3180 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3182 }
3183 return *BFI;
3184}
3185
3186// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3187// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3188// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3189BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3190 auto *Res = getBPI();
3191 if (Res)
3192 return Res;
3193
3194 if (Force)
3195 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3196
3197 return *BPI;
3198}
3199
3200BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3201 auto *Res = getBFI();
3202 if (Res)
3203 return Res;
3204
3205 if (Force)
3206 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3207
3208 return *BFI;
3209}
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:533
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define P(N)
ppc ctr loops verify
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
Definition: BasicBlock.cpp:52
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:287
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition: Constants.h:893
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1897
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2631
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:214
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:42
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:739
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Per-instruction record of debug-info.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
const BasicBlock * getParent() const
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:80
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1764
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
bool isSpecialTerminator() const
Definition: Instruction.h:285
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:32
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Constant * getPredicateOnEdge(CmpInst::Predicate Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
Constant * getPredicateAt(CmpInst::Predicate Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
An instruction for reading from memory.
Definition: Instructions.h:176
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
bool isUnordered() const
Definition: Instructions.h:249
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
static LocationSize precise(uint64_t Value)
Metadata node.
Definition: Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Representation for a specific memory location.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:52
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:199
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:69
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
'undef' values are things that do not have specified contents.
Definition: Constants.h:1412
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1067
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:746
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
auto pred_end(const MachineBasicBlock *BB)
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:3566
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:621
void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition: Local.cpp:3788
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
auto pred_size(const MachineBasicBlock *BB)
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:737
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:492
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:155
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:406
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1156
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3426
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:777
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1978
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1624
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1476