LLVM 20.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <memory>
77#include <utility>
78
79using namespace llvm;
80using namespace jumpthreading;
81
82#define DEBUG_TYPE "jump-threading"
83
84STATISTIC(NumThreads, "Number of jumps threaded");
85STATISTIC(NumFolds, "Number of terminators folded");
86STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
87
89BBDuplicateThreshold("jump-threading-threshold",
90 cl::desc("Max block size to duplicate for jump threading"),
92
95 "jump-threading-implication-search-threshold",
96 cl::desc("The number of predecessors to search for a stronger "
97 "condition to use to thread over a weaker condition"),
99
101 "jump-threading-phi-threshold",
102 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
103 cl::Hidden);
104
106 "jump-threading-across-loop-headers",
107 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
108 cl::init(false), cl::Hidden);
109
111 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
112}
113
114// Update branch probability information according to conditional
115// branch probability. This is usually made possible for cloned branches
116// in inline instances by the context specific profile in the caller.
117// For instance,
118//
119// [Block PredBB]
120// [Branch PredBr]
121// if (t) {
122// Block A;
123// } else {
124// Block B;
125// }
126//
127// [Block BB]
128// cond = PN([true, %A], [..., %B]); // PHI node
129// [Branch CondBr]
130// if (cond) {
131// ... // P(cond == true) = 1%
132// }
133//
134// Here we know that when block A is taken, cond must be true, which means
135// P(cond == true | A) = 1
136//
137// Given that P(cond == true) = P(cond == true | A) * P(A) +
138// P(cond == true | B) * P(B)
139// we get:
140// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
141//
142// which gives us:
143// P(A) is less than P(cond == true), i.e.
144// P(t == true) <= P(cond == true)
145//
146// In other words, if we know P(cond == true) is unlikely, we know
147// that P(t == true) is also unlikely.
148//
150 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
151 if (!CondBr)
152 return;
153
154 uint64_t TrueWeight, FalseWeight;
155 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
156 return;
157
158 if (TrueWeight + FalseWeight == 0)
159 // Zero branch_weights do not give a hint for getting branch probabilities.
160 // Technically it would result in division by zero denominator, which is
161 // TrueWeight + FalseWeight.
162 return;
163
164 // Returns the outgoing edge of the dominating predecessor block
165 // that leads to the PhiNode's incoming block:
166 auto GetPredOutEdge =
167 [](BasicBlock *IncomingBB,
168 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
169 auto *PredBB = IncomingBB;
170 auto *SuccBB = PhiBB;
172 while (true) {
173 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
174 if (PredBr && PredBr->isConditional())
175 return {PredBB, SuccBB};
176 Visited.insert(PredBB);
177 auto *SinglePredBB = PredBB->getSinglePredecessor();
178 if (!SinglePredBB)
179 return {nullptr, nullptr};
180
181 // Stop searching when SinglePredBB has been visited. It means we see
182 // an unreachable loop.
183 if (Visited.count(SinglePredBB))
184 return {nullptr, nullptr};
185
186 SuccBB = PredBB;
187 PredBB = SinglePredBB;
188 }
189 };
190
191 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
192 Value *PhiOpnd = PN->getIncomingValue(i);
193 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
194
195 if (!CI || !CI->getType()->isIntegerTy(1))
196 continue;
197
200 TrueWeight, TrueWeight + FalseWeight)
202 FalseWeight, TrueWeight + FalseWeight));
203
204 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
205 if (!PredOutEdge.first)
206 return;
207
208 BasicBlock *PredBB = PredOutEdge.first;
209 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
210 if (!PredBr)
211 return;
212
213 uint64_t PredTrueWeight, PredFalseWeight;
214 // FIXME: We currently only set the profile data when it is missing.
215 // With PGO, this can be used to refine even existing profile data with
216 // context information. This needs to be done after more performance
217 // testing.
218 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
219 continue;
220
221 // We can not infer anything useful when BP >= 50%, because BP is the
222 // upper bound probability value.
223 if (BP >= BranchProbability(50, 100))
224 continue;
225
226 uint32_t Weights[2];
227 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
228 Weights[0] = BP.getNumerator();
229 Weights[1] = BP.getCompl().getNumerator();
230 } else {
231 Weights[0] = BP.getCompl().getNumerator();
232 Weights[1] = BP.getNumerator();
233 }
234 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
235 }
236}
237
240 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
241 // Jump Threading has no sense for the targets with divergent CF
243 return PreservedAnalyses::all();
244 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
245 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
246 auto &AA = AM.getResult<AAManager>(F);
247 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
248
249 bool Changed =
250 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
251 std::make_unique<DomTreeUpdater>(
252 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
253 std::nullopt, std::nullopt);
254
255 if (!Changed)
256 return PreservedAnalyses::all();
257
258
260
261#if defined(EXPENSIVE_CHECKS)
262 assert(getDomTreeUpdater()->getDomTree().verify(
263 DominatorTree::VerificationLevel::Full) &&
264 "DT broken after JumpThreading");
265 assert((!getDomTreeUpdater()->hasPostDomTree() ||
266 getDomTreeUpdater()->getPostDomTree().verify(
268 "PDT broken after JumpThreading");
269#else
270 assert(getDomTreeUpdater()->getDomTree().verify(
271 DominatorTree::VerificationLevel::Fast) &&
272 "DT broken after JumpThreading");
273 assert((!getDomTreeUpdater()->hasPostDomTree() ||
274 getDomTreeUpdater()->getPostDomTree().verify(
276 "PDT broken after JumpThreading");
277#endif
278
279 return getPreservedAnalysis();
280}
281
283 TargetLibraryInfo *TLI_,
285 AliasAnalysis *AA_,
286 std::unique_ptr<DomTreeUpdater> DTU_,
287 std::optional<BlockFrequencyInfo *> BFI_,
288 std::optional<BranchProbabilityInfo *> BPI_) {
289 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
290 F = &F_;
291 FAM = FAM_;
292 TLI = TLI_;
293 TTI = TTI_;
294 LVI = LVI_;
295 AA = AA_;
296 DTU = std::move(DTU_);
297 BFI = BFI_;
298 BPI = BPI_;
299 auto *GuardDecl = F->getParent()->getFunction(
300 Intrinsic::getName(Intrinsic::experimental_guard));
301 HasGuards = GuardDecl && !GuardDecl->use_empty();
302
303 // Reduce the number of instructions duplicated when optimizing strictly for
304 // size.
305 if (BBDuplicateThreshold.getNumOccurrences())
306 BBDupThreshold = BBDuplicateThreshold;
307 else if (F->hasFnAttribute(Attribute::MinSize))
308 BBDupThreshold = 3;
309 else
310 BBDupThreshold = DefaultBBDupThreshold;
311
312 // JumpThreading must not processes blocks unreachable from entry. It's a
313 // waste of compute time and can potentially lead to hangs.
315 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
316 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
317 DominatorTree &DT = DTU->getDomTree();
318 for (auto &BB : *F)
319 if (!DT.isReachableFromEntry(&BB))
320 Unreachable.insert(&BB);
321
324
325 bool EverChanged = false;
326 bool Changed;
327 do {
328 Changed = false;
329 for (auto &BB : *F) {
330 if (Unreachable.count(&BB))
331 continue;
332 while (processBlock(&BB)) // Thread all of the branches we can over BB.
333 Changed = ChangedSinceLastAnalysisUpdate = true;
334
335 // Jump threading may have introduced redundant debug values into BB
336 // which should be removed.
337 if (Changed)
339
340 // Stop processing BB if it's the entry or is now deleted. The following
341 // routines attempt to eliminate BB and locating a suitable replacement
342 // for the entry is non-trivial.
343 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
344 continue;
345
346 if (pred_empty(&BB)) {
347 // When processBlock makes BB unreachable it doesn't bother to fix up
348 // the instructions in it. We must remove BB to prevent invalid IR.
349 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
350 << "' with terminator: " << *BB.getTerminator()
351 << '\n');
352 LoopHeaders.erase(&BB);
353 LVI->eraseBlock(&BB);
354 DeleteDeadBlock(&BB, DTU.get());
355 Changed = ChangedSinceLastAnalysisUpdate = true;
356 continue;
357 }
358
359 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
360 // is "almost empty", we attempt to merge BB with its sole successor.
361 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
362 if (BI && BI->isUnconditional()) {
363 BasicBlock *Succ = BI->getSuccessor(0);
364 if (
365 // The terminator must be the only non-phi instruction in BB.
366 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
367 // Don't alter Loop headers and latches to ensure another pass can
368 // detect and transform nested loops later.
369 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
372 // BB is valid for cleanup here because we passed in DTU. F remains
373 // BB's parent until a DTU->getDomTree() event.
374 LVI->eraseBlock(&BB);
375 Changed = ChangedSinceLastAnalysisUpdate = true;
376 }
377 }
378 }
379 EverChanged |= Changed;
380 } while (Changed);
381
382 LoopHeaders.clear();
383 return EverChanged;
384}
385
386// Replace uses of Cond with ToVal when safe to do so. If all uses are
387// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
388// because we may incorrectly replace uses when guards/assumes are uses of
389// of `Cond` and we used the guards/assume to reason about the `Cond` value
390// at the end of block. RAUW unconditionally replaces all uses
391// including the guards/assumes themselves and the uses before the
392// guard/assume.
394 BasicBlock *KnownAtEndOfBB) {
395 bool Changed = false;
396 assert(Cond->getType() == ToVal->getType());
397 // We can unconditionally replace all uses in non-local blocks (i.e. uses
398 // strictly dominated by BB), since LVI information is true from the
399 // terminator of BB.
400 if (Cond->getParent() == KnownAtEndOfBB)
401 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
402 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
403 // Replace any debug-info record users of Cond with ToVal.
404 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
405 DVR.replaceVariableLocationOp(Cond, ToVal, true);
406
407 // Reached the Cond whose uses we are trying to replace, so there are no
408 // more uses.
409 if (&I == Cond)
410 break;
411 // We only replace uses in instructions that are guaranteed to reach the end
412 // of BB, where we know Cond is ToVal.
414 break;
415 Changed |= I.replaceUsesOfWith(Cond, ToVal);
416 }
417 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
418 Cond->eraseFromParent();
419 Changed = true;
420 }
421 return Changed;
422}
423
424/// Return the cost of duplicating a piece of this block from first non-phi
425/// and before StopAt instruction to thread across it. Stop scanning the block
426/// when exceeding the threshold. If duplication is impossible, returns ~0U.
428 BasicBlock *BB,
429 Instruction *StopAt,
430 unsigned Threshold) {
431 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
432
433 // Do not duplicate the BB if it has a lot of PHI nodes.
434 // If a threadable chain is too long then the number of PHI nodes can add up,
435 // leading to a substantial increase in compile time when rewriting the SSA.
436 unsigned PhiCount = 0;
437 Instruction *FirstNonPHI = nullptr;
438 for (Instruction &I : *BB) {
439 if (!isa<PHINode>(&I)) {
440 FirstNonPHI = &I;
441 break;
442 }
443 if (++PhiCount > PhiDuplicateThreshold)
444 return ~0U;
445 }
446
447 /// Ignore PHI nodes, these will be flattened when duplication happens.
448 BasicBlock::const_iterator I(FirstNonPHI);
449
450 // FIXME: THREADING will delete values that are just used to compute the
451 // branch, so they shouldn't count against the duplication cost.
452
453 unsigned Bonus = 0;
454 if (BB->getTerminator() == StopAt) {
455 // Threading through a switch statement is particularly profitable. If this
456 // block ends in a switch, decrease its cost to make it more likely to
457 // happen.
458 if (isa<SwitchInst>(StopAt))
459 Bonus = 6;
460
461 // The same holds for indirect branches, but slightly more so.
462 if (isa<IndirectBrInst>(StopAt))
463 Bonus = 8;
464 }
465
466 // Bump the threshold up so the early exit from the loop doesn't skip the
467 // terminator-based Size adjustment at the end.
468 Threshold += Bonus;
469
470 // Sum up the cost of each instruction until we get to the terminator. Don't
471 // include the terminator because the copy won't include it.
472 unsigned Size = 0;
473 for (; &*I != StopAt; ++I) {
474
475 // Stop scanning the block if we've reached the threshold.
476 if (Size > Threshold)
477 return Size;
478
479 // Bail out if this instruction gives back a token type, it is not possible
480 // to duplicate it if it is used outside this BB.
481 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
482 return ~0U;
483
484 // Blocks with NoDuplicate are modelled as having infinite cost, so they
485 // are never duplicated.
486 if (const CallInst *CI = dyn_cast<CallInst>(I))
487 if (CI->cannotDuplicate() || CI->isConvergent())
488 return ~0U;
489
492 continue;
493
494 // All other instructions count for at least one unit.
495 ++Size;
496
497 // Calls are more expensive. If they are non-intrinsic calls, we model them
498 // as having cost of 4. If they are a non-vector intrinsic, we model them
499 // as having cost of 2 total, and if they are a vector intrinsic, we model
500 // them as having cost 1.
501 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
502 if (!isa<IntrinsicInst>(CI))
503 Size += 3;
504 else if (!CI->getType()->isVectorTy())
505 Size += 1;
506 }
507 }
508
509 return Size > Bonus ? Size - Bonus : 0;
510}
511
512/// findLoopHeaders - We do not want jump threading to turn proper loop
513/// structures into irreducible loops. Doing this breaks up the loop nesting
514/// hierarchy and pessimizes later transformations. To prevent this from
515/// happening, we first have to find the loop headers. Here we approximate this
516/// by finding targets of backedges in the CFG.
517///
518/// Note that there definitely are cases when we want to allow threading of
519/// edges across a loop header. For example, threading a jump from outside the
520/// loop (the preheader) to an exit block of the loop is definitely profitable.
521/// It is also almost always profitable to thread backedges from within the loop
522/// to exit blocks, and is often profitable to thread backedges to other blocks
523/// within the loop (forming a nested loop). This simple analysis is not rich
524/// enough to track all of these properties and keep it up-to-date as the CFG
525/// mutates, so we don't allow any of these transformations.
528 FindFunctionBackedges(F, Edges);
529
530 for (const auto &Edge : Edges)
531 LoopHeaders.insert(Edge.second);
532}
533
534/// getKnownConstant - Helper method to determine if we can thread over a
535/// terminator with the given value as its condition, and if so what value to
536/// use for that. What kind of value this is depends on whether we want an
537/// integer or a block address, but an undef is always accepted.
538/// Returns null if Val is null or not an appropriate constant.
540 if (!Val)
541 return nullptr;
542
543 // Undef is "known" enough.
544 if (UndefValue *U = dyn_cast<UndefValue>(Val))
545 return U;
546
547 if (Preference == WantBlockAddress)
548 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
549
550 return dyn_cast<ConstantInt>(Val);
551}
552
553/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
554/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
555/// in any of our predecessors. If so, return the known list of value and pred
556/// BB in the result vector.
557///
558/// This returns true if there were any known values.
560 Value *V, BasicBlock *BB, PredValueInfo &Result,
561 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
562 Instruction *CxtI) {
563 const DataLayout &DL = BB->getDataLayout();
564
565 // This method walks up use-def chains recursively. Because of this, we could
566 // get into an infinite loop going around loops in the use-def chain. To
567 // prevent this, keep track of what (value, block) pairs we've already visited
568 // and terminate the search if we loop back to them
569 if (!RecursionSet.insert(V).second)
570 return false;
571
572 // If V is a constant, then it is known in all predecessors.
573 if (Constant *KC = getKnownConstant(V, Preference)) {
574 for (BasicBlock *Pred : predecessors(BB))
575 Result.emplace_back(KC, Pred);
576
577 return !Result.empty();
578 }
579
580 // If V is a non-instruction value, or an instruction in a different block,
581 // then it can't be derived from a PHI.
582 Instruction *I = dyn_cast<Instruction>(V);
583 if (!I || I->getParent() != BB) {
584
585 // Okay, if this is a live-in value, see if it has a known value at the any
586 // edge from our predecessors.
587 for (BasicBlock *P : predecessors(BB)) {
588 using namespace PatternMatch;
589 // If the value is known by LazyValueInfo to be a constant in a
590 // predecessor, use that information to try to thread this block.
591 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
592 // If I is a non-local compare-with-constant instruction, use more-rich
593 // 'getPredicateOnEdge' method. This would be able to handle value
594 // inequalities better, for example if the compare is "X < 4" and "X < 3"
595 // is known true but "X < 4" itself is not available.
597 Value *Val;
598 Constant *Cst;
599 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
600 PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
601 if (Constant *KC = getKnownConstant(PredCst, Preference))
602 Result.emplace_back(KC, P);
603 }
604
605 return !Result.empty();
606 }
607
608 /// If I is a PHI node, then we know the incoming values for any constants.
609 if (PHINode *PN = dyn_cast<PHINode>(I)) {
610 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
611 Value *InVal = PN->getIncomingValue(i);
612 if (Constant *KC = getKnownConstant(InVal, Preference)) {
613 Result.emplace_back(KC, PN->getIncomingBlock(i));
614 } else {
615 Constant *CI = LVI->getConstantOnEdge(InVal,
616 PN->getIncomingBlock(i),
617 BB, CxtI);
618 if (Constant *KC = getKnownConstant(CI, Preference))
619 Result.emplace_back(KC, PN->getIncomingBlock(i));
620 }
621 }
622
623 return !Result.empty();
624 }
625
626 // Handle Cast instructions.
627 if (CastInst *CI = dyn_cast<CastInst>(I)) {
628 Value *Source = CI->getOperand(0);
629 PredValueInfoTy Vals;
630 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
631 RecursionSet, CxtI);
632 if (Vals.empty())
633 return false;
634
635 // Convert the known values.
636 for (auto &Val : Vals)
637 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
638 CI->getType(), DL))
639 Result.emplace_back(Folded, Val.second);
640
641 return !Result.empty();
642 }
643
644 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
645 Value *Source = FI->getOperand(0);
646 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
647 RecursionSet, CxtI);
648
649 erase_if(Result, [](auto &Pair) {
650 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
651 });
652
653 return !Result.empty();
654 }
655
656 // Handle some boolean conditions.
657 if (I->getType()->getPrimitiveSizeInBits() == 1) {
658 using namespace PatternMatch;
659 if (Preference != WantInteger)
660 return false;
661 // X | true -> true
662 // X & false -> false
663 Value *Op0, *Op1;
664 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
665 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
666 PredValueInfoTy LHSVals, RHSVals;
667
669 RecursionSet, CxtI);
671 RecursionSet, CxtI);
672
673 if (LHSVals.empty() && RHSVals.empty())
674 return false;
675
676 ConstantInt *InterestingVal;
677 if (match(I, m_LogicalOr()))
678 InterestingVal = ConstantInt::getTrue(I->getContext());
679 else
680 InterestingVal = ConstantInt::getFalse(I->getContext());
681
682 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
683
684 // Scan for the sentinel. If we find an undef, force it to the
685 // interesting value: x|undef -> true and x&undef -> false.
686 for (const auto &LHSVal : LHSVals)
687 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
688 Result.emplace_back(InterestingVal, LHSVal.second);
689 LHSKnownBBs.insert(LHSVal.second);
690 }
691 for (const auto &RHSVal : RHSVals)
692 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
693 // If we already inferred a value for this block on the LHS, don't
694 // re-add it.
695 if (!LHSKnownBBs.count(RHSVal.second))
696 Result.emplace_back(InterestingVal, RHSVal.second);
697 }
698
699 return !Result.empty();
700 }
701
702 // Handle the NOT form of XOR.
703 if (I->getOpcode() == Instruction::Xor &&
704 isa<ConstantInt>(I->getOperand(1)) &&
705 cast<ConstantInt>(I->getOperand(1))->isOne()) {
706 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
707 WantInteger, RecursionSet, CxtI);
708 if (Result.empty())
709 return false;
710
711 // Invert the known values.
712 for (auto &R : Result)
713 R.first = ConstantExpr::getNot(R.first);
714
715 return true;
716 }
717
718 // Try to simplify some other binary operator values.
719 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
720 if (Preference != WantInteger)
721 return false;
722 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
723 PredValueInfoTy LHSVals;
724 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
725 WantInteger, RecursionSet, CxtI);
726
727 // Try to use constant folding to simplify the binary operator.
728 for (const auto &LHSVal : LHSVals) {
729 Constant *V = LHSVal.first;
730 Constant *Folded =
731 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
732
733 if (Constant *KC = getKnownConstant(Folded, WantInteger))
734 Result.emplace_back(KC, LHSVal.second);
735 }
736 }
737
738 return !Result.empty();
739 }
740
741 // Handle compare with phi operand, where the PHI is defined in this block.
742 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
743 if (Preference != WantInteger)
744 return false;
745 Type *CmpType = Cmp->getType();
746 Value *CmpLHS = Cmp->getOperand(0);
747 Value *CmpRHS = Cmp->getOperand(1);
748 CmpInst::Predicate Pred = Cmp->getPredicate();
749
750 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
751 if (!PN)
752 PN = dyn_cast<PHINode>(CmpRHS);
753 // Do not perform phi translation across a loop header phi, because this
754 // may result in comparison of values from two different loop iterations.
755 // FIXME: This check is broken if LoopHeaders is not populated.
756 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
757 const DataLayout &DL = PN->getDataLayout();
758 // We can do this simplification if any comparisons fold to true or false.
759 // See if any do.
760 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
761 BasicBlock *PredBB = PN->getIncomingBlock(i);
762 Value *LHS, *RHS;
763 if (PN == CmpLHS) {
764 LHS = PN->getIncomingValue(i);
765 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
766 } else {
767 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
768 RHS = PN->getIncomingValue(i);
769 }
770 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
771 if (!Res) {
772 if (!isa<Constant>(RHS))
773 continue;
774
775 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
776 auto LHSInst = dyn_cast<Instruction>(LHS);
777 if (LHSInst && LHSInst->getParent() == BB)
778 continue;
779
780 Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
781 BB, CxtI ? CxtI : Cmp);
782 }
783
784 if (Constant *KC = getKnownConstant(Res, WantInteger))
785 Result.emplace_back(KC, PredBB);
786 }
787
788 return !Result.empty();
789 }
790
791 // If comparing a live-in value against a constant, see if we know the
792 // live-in value on any predecessors.
793 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
794 Constant *CmpConst = cast<Constant>(CmpRHS);
795
796 if (!isa<Instruction>(CmpLHS) ||
797 cast<Instruction>(CmpLHS)->getParent() != BB) {
798 for (BasicBlock *P : predecessors(BB)) {
799 // If the value is known by LazyValueInfo to be a constant in a
800 // predecessor, use that information to try to thread this block.
801 Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
802 CxtI ? CxtI : Cmp);
803 if (Constant *KC = getKnownConstant(Res, WantInteger))
804 Result.emplace_back(KC, P);
805 }
806
807 return !Result.empty();
808 }
809
810 // InstCombine can fold some forms of constant range checks into
811 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
812 // x as a live-in.
813 {
814 using namespace PatternMatch;
815
816 Value *AddLHS;
817 ConstantInt *AddConst;
818 if (isa<ConstantInt>(CmpConst) &&
819 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
820 if (!isa<Instruction>(AddLHS) ||
821 cast<Instruction>(AddLHS)->getParent() != BB) {
822 for (BasicBlock *P : predecessors(BB)) {
823 // If the value is known by LazyValueInfo to be a ConstantRange in
824 // a predecessor, use that information to try to thread this
825 // block.
827 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
828 // Propagate the range through the addition.
829 CR = CR.add(AddConst->getValue());
830
831 // Get the range where the compare returns true.
833 Pred, cast<ConstantInt>(CmpConst)->getValue());
834
835 Constant *ResC;
836 if (CmpRange.contains(CR))
837 ResC = ConstantInt::getTrue(CmpType);
838 else if (CmpRange.inverse().contains(CR))
839 ResC = ConstantInt::getFalse(CmpType);
840 else
841 continue;
842
843 Result.emplace_back(ResC, P);
844 }
845
846 return !Result.empty();
847 }
848 }
849 }
850
851 // Try to find a constant value for the LHS of a comparison,
852 // and evaluate it statically if we can.
853 PredValueInfoTy LHSVals;
854 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
855 WantInteger, RecursionSet, CxtI);
856
857 for (const auto &LHSVal : LHSVals) {
858 Constant *V = LHSVal.first;
859 Constant *Folded =
860 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
861 if (Constant *KC = getKnownConstant(Folded, WantInteger))
862 Result.emplace_back(KC, LHSVal.second);
863 }
864
865 return !Result.empty();
866 }
867 }
868
869 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
870 // Handle select instructions where at least one operand is a known constant
871 // and we can figure out the condition value for any predecessor block.
872 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
873 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
874 PredValueInfoTy Conds;
875 if ((TrueVal || FalseVal) &&
876 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
877 WantInteger, RecursionSet, CxtI)) {
878 for (auto &C : Conds) {
879 Constant *Cond = C.first;
880
881 // Figure out what value to use for the condition.
882 bool KnownCond;
883 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
884 // A known boolean.
885 KnownCond = CI->isOne();
886 } else {
887 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
888 // Either operand will do, so be sure to pick the one that's a known
889 // constant.
890 // FIXME: Do this more cleverly if both values are known constants?
891 KnownCond = (TrueVal != nullptr);
892 }
893
894 // See if the select has a known constant value for this predecessor.
895 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
896 Result.emplace_back(Val, C.second);
897 }
898
899 return !Result.empty();
900 }
901 }
902
903 // If all else fails, see if LVI can figure out a constant value for us.
904 assert(CxtI->getParent() == BB && "CxtI should be in BB");
905 Constant *CI = LVI->getConstant(V, CxtI);
906 if (Constant *KC = getKnownConstant(CI, Preference)) {
907 for (BasicBlock *Pred : predecessors(BB))
908 Result.emplace_back(KC, Pred);
909 }
910
911 return !Result.empty();
912}
913
914/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
915/// in an undefined jump, decide which block is best to revector to.
916///
917/// Since we can pick an arbitrary destination, we pick the successor with the
918/// fewest predecessors. This should reduce the in-degree of the others.
920 Instruction *BBTerm = BB->getTerminator();
921 unsigned MinSucc = 0;
922 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
923 // Compute the successor with the minimum number of predecessors.
924 unsigned MinNumPreds = pred_size(TestBB);
925 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
926 TestBB = BBTerm->getSuccessor(i);
927 unsigned NumPreds = pred_size(TestBB);
928 if (NumPreds < MinNumPreds) {
929 MinSucc = i;
930 MinNumPreds = NumPreds;
931 }
932 }
933
934 return MinSucc;
935}
936
938 if (!BB->hasAddressTaken()) return false;
939
940 // If the block has its address taken, it may be a tree of dead constants
941 // hanging off of it. These shouldn't keep the block alive.
944 return !BA->use_empty();
945}
946
947/// processBlock - If there are any predecessors whose control can be threaded
948/// through to a successor, transform them now.
950 // If the block is trivially dead, just return and let the caller nuke it.
951 // This simplifies other transformations.
952 if (DTU->isBBPendingDeletion(BB) ||
953 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
954 return false;
955
956 // If this block has a single predecessor, and if that pred has a single
957 // successor, merge the blocks. This encourages recursive jump threading
958 // because now the condition in this block can be threaded through
959 // predecessors of our predecessor block.
961 return true;
962
964 return true;
965
966 // Look if we can propagate guards to predecessors.
967 if (HasGuards && processGuards(BB))
968 return true;
969
970 // What kind of constant we're looking for.
971 ConstantPreference Preference = WantInteger;
972
973 // Look to see if the terminator is a conditional branch, switch or indirect
974 // branch, if not we can't thread it.
975 Value *Condition;
976 Instruction *Terminator = BB->getTerminator();
977 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
978 // Can't thread an unconditional jump.
979 if (BI->isUnconditional()) return false;
980 Condition = BI->getCondition();
981 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
982 Condition = SI->getCondition();
983 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
984 // Can't thread indirect branch with no successors.
985 if (IB->getNumSuccessors() == 0) return false;
986 Condition = IB->getAddress()->stripPointerCasts();
987 Preference = WantBlockAddress;
988 } else {
989 return false; // Must be an invoke or callbr.
990 }
991
992 // Keep track if we constant folded the condition in this invocation.
993 bool ConstantFolded = false;
994
995 // Run constant folding to see if we can reduce the condition to a simple
996 // constant.
997 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
998 Value *SimpleVal =
1000 if (SimpleVal) {
1001 I->replaceAllUsesWith(SimpleVal);
1002 if (isInstructionTriviallyDead(I, TLI))
1003 I->eraseFromParent();
1004 Condition = SimpleVal;
1005 ConstantFolded = true;
1006 }
1007 }
1008
1009 // If the terminator is branching on an undef or freeze undef, we can pick any
1010 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1011 auto *FI = dyn_cast<FreezeInst>(Condition);
1012 if (isa<UndefValue>(Condition) ||
1013 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1014 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1015 std::vector<DominatorTree::UpdateType> Updates;
1016
1017 // Fold the branch/switch.
1018 Instruction *BBTerm = BB->getTerminator();
1019 Updates.reserve(BBTerm->getNumSuccessors());
1020 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1021 if (i == BestSucc) continue;
1022 BasicBlock *Succ = BBTerm->getSuccessor(i);
1023 Succ->removePredecessor(BB, true);
1024 Updates.push_back({DominatorTree::Delete, BB, Succ});
1025 }
1026
1027 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1028 << "' folding undef terminator: " << *BBTerm << '\n');
1029 Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());
1030 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1031 ++NumFolds;
1032 BBTerm->eraseFromParent();
1033 DTU->applyUpdatesPermissive(Updates);
1034 if (FI)
1035 FI->eraseFromParent();
1036 return true;
1037 }
1038
1039 // If the terminator of this block is branching on a constant, simplify the
1040 // terminator to an unconditional branch. This can occur due to threading in
1041 // other blocks.
1042 if (getKnownConstant(Condition, Preference)) {
1043 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1044 << "' folding terminator: " << *BB->getTerminator()
1045 << '\n');
1046 ++NumFolds;
1047 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1048 if (auto *BPI = getBPI())
1049 BPI->eraseBlock(BB);
1050 return true;
1051 }
1052
1053 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1054
1055 // All the rest of our checks depend on the condition being an instruction.
1056 if (!CondInst) {
1057 // FIXME: Unify this with code below.
1058 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1059 return true;
1060 return ConstantFolded;
1061 }
1062
1063 // Some of the following optimization can safely work on the unfrozen cond.
1064 Value *CondWithoutFreeze = CondInst;
1065 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1066 CondWithoutFreeze = FI->getOperand(0);
1067
1068 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1069 // If we're branching on a conditional, LVI might be able to determine
1070 // it's value at the branch instruction. We only handle comparisons
1071 // against a constant at this time.
1072 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1073 Constant *Res =
1074 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1075 CondConst, BB->getTerminator(),
1076 /*UseBlockValue=*/false);
1077 if (Res) {
1078 // We can safely replace *some* uses of the CondInst if it has
1079 // exactly one value as returned by LVI. RAUW is incorrect in the
1080 // presence of guards and assumes, that have the `Cond` as the use. This
1081 // is because we use the guards/assume to reason about the `Cond` value
1082 // at the end of block, but RAUW unconditionally replaces all uses
1083 // including the guards/assumes themselves and the uses before the
1084 // guard/assume.
1085 if (replaceFoldableUses(CondCmp, Res, BB))
1086 return true;
1087 }
1088
1089 // We did not manage to simplify this branch, try to see whether
1090 // CondCmp depends on a known phi-select pattern.
1091 if (tryToUnfoldSelect(CondCmp, BB))
1092 return true;
1093 }
1094 }
1095
1096 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1097 if (tryToUnfoldSelect(SI, BB))
1098 return true;
1099
1100 // Check for some cases that are worth simplifying. Right now we want to look
1101 // for loads that are used by a switch or by the condition for the branch. If
1102 // we see one, check to see if it's partially redundant. If so, insert a PHI
1103 // which can then be used to thread the values.
1104 Value *SimplifyValue = CondWithoutFreeze;
1105
1106 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1107 if (isa<Constant>(CondCmp->getOperand(1)))
1108 SimplifyValue = CondCmp->getOperand(0);
1109
1110 // TODO: There are other places where load PRE would be profitable, such as
1111 // more complex comparisons.
1112 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1114 return true;
1115
1116 // Before threading, try to propagate profile data backwards:
1117 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1118 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1120
1121 // Handle a variety of cases where we are branching on something derived from
1122 // a PHI node in the current block. If we can prove that any predecessors
1123 // compute a predictable value based on a PHI node, thread those predecessors.
1124 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1125 return true;
1126
1127 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1128 // the current block, see if we can simplify.
1129 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1130 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1131 return processBranchOnPHI(PN);
1132
1133 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1134 if (CondInst->getOpcode() == Instruction::Xor &&
1135 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1136 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1137
1138 // Search for a stronger dominating condition that can be used to simplify a
1139 // conditional branch leaving BB.
1141 return true;
1142
1143 return false;
1144}
1145
1147 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1148 if (!BI || !BI->isConditional())
1149 return false;
1150
1151 Value *Cond = BI->getCondition();
1152 // Assuming that predecessor's branch was taken, if pred's branch condition
1153 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1154 // freeze(Cond) is either true or a nondeterministic value.
1155 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1156 // without affecting other instructions.
1157 auto *FICond = dyn_cast<FreezeInst>(Cond);
1158 if (FICond && FICond->hasOneUse())
1159 Cond = FICond->getOperand(0);
1160 else
1161 FICond = nullptr;
1162
1163 BasicBlock *CurrentBB = BB;
1164 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1165 unsigned Iter = 0;
1166
1167 auto &DL = BB->getDataLayout();
1168
1169 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1170 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1171 if (!PBI || !PBI->isConditional())
1172 return false;
1173 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1174 return false;
1175
1176 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1177 std::optional<bool> Implication =
1178 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1179
1180 // If the branch condition of BB (which is Cond) and CurrentPred are
1181 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1182 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1183 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1184 FICond->getOperand(0))
1185 Implication = CondIsTrue;
1186 }
1187
1188 if (Implication) {
1189 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1190 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1191 RemoveSucc->removePredecessor(BB);
1192 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());
1193 UncondBI->setDebugLoc(BI->getDebugLoc());
1194 ++NumFolds;
1195 BI->eraseFromParent();
1196 if (FICond)
1197 FICond->eraseFromParent();
1198
1199 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1200 if (auto *BPI = getBPI())
1201 BPI->eraseBlock(BB);
1202 return true;
1203 }
1204 CurrentBB = CurrentPred;
1205 CurrentPred = CurrentBB->getSinglePredecessor();
1206 }
1207
1208 return false;
1209}
1210
1211/// Return true if Op is an instruction defined in the given block.
1213 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1214 if (OpInst->getParent() == BB)
1215 return true;
1216 return false;
1217}
1218
1219/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1220/// redundant load instruction, eliminate it by replacing it with a PHI node.
1221/// This is an important optimization that encourages jump threading, and needs
1222/// to be run interlaced with other jump threading tasks.
1224 // Don't hack volatile and ordered loads.
1225 if (!LoadI->isUnordered()) return false;
1226
1227 // If the load is defined in a block with exactly one predecessor, it can't be
1228 // partially redundant.
1229 BasicBlock *LoadBB = LoadI->getParent();
1230 if (LoadBB->getSinglePredecessor())
1231 return false;
1232
1233 // If the load is defined in an EH pad, it can't be partially redundant,
1234 // because the edges between the invoke and the EH pad cannot have other
1235 // instructions between them.
1236 if (LoadBB->isEHPad())
1237 return false;
1238
1239 Value *LoadedPtr = LoadI->getOperand(0);
1240
1241 // If the loaded operand is defined in the LoadBB and its not a phi,
1242 // it can't be available in predecessors.
1243 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1244 return false;
1245
1246 // Scan a few instructions up from the load, to see if it is obviously live at
1247 // the entry to its block.
1248 BasicBlock::iterator BBIt(LoadI);
1249 bool IsLoadCSE;
1250 BatchAAResults BatchAA(*AA);
1251 // The dominator tree is updated lazily and may not be valid at this point.
1252 BatchAA.disableDominatorTree();
1253 if (Value *AvailableVal = FindAvailableLoadedValue(
1254 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1255 // If the value of the load is locally available within the block, just use
1256 // it. This frequently occurs for reg2mem'd allocas.
1257
1258 if (IsLoadCSE) {
1259 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1260 combineMetadataForCSE(NLoadI, LoadI, false);
1261 LVI->forgetValue(NLoadI);
1262 };
1263
1264 // If the returned value is the load itself, replace with poison. This can
1265 // only happen in dead loops.
1266 if (AvailableVal == LoadI)
1267 AvailableVal = PoisonValue::get(LoadI->getType());
1268 if (AvailableVal->getType() != LoadI->getType()) {
1269 AvailableVal = CastInst::CreateBitOrPointerCast(
1270 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1271 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1272 }
1273 LoadI->replaceAllUsesWith(AvailableVal);
1274 LoadI->eraseFromParent();
1275 return true;
1276 }
1277
1278 // Otherwise, if we scanned the whole block and got to the top of the block,
1279 // we know the block is locally transparent to the load. If not, something
1280 // might clobber its value.
1281 if (BBIt != LoadBB->begin())
1282 return false;
1283
1284 // If all of the loads and stores that feed the value have the same AA tags,
1285 // then we can propagate them onto any newly inserted loads.
1286 AAMDNodes AATags = LoadI->getAAMetadata();
1287
1288 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1289
1290 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1291
1292 AvailablePredsTy AvailablePreds;
1293 BasicBlock *OneUnavailablePred = nullptr;
1295
1296 // If we got here, the loaded value is transparent through to the start of the
1297 // block. Check to see if it is available in any of the predecessor blocks.
1298 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1299 // If we already scanned this predecessor, skip it.
1300 if (!PredsScanned.insert(PredBB).second)
1301 continue;
1302
1303 BBIt = PredBB->end();
1304 unsigned NumScanedInst = 0;
1305 Value *PredAvailable = nullptr;
1306 // NOTE: We don't CSE load that is volatile or anything stronger than
1307 // unordered, that should have been checked when we entered the function.
1308 assert(LoadI->isUnordered() &&
1309 "Attempting to CSE volatile or atomic loads");
1310 // If this is a load on a phi pointer, phi-translate it and search
1311 // for available load/store to the pointer in predecessors.
1312 Type *AccessTy = LoadI->getType();
1313 const auto &DL = LoadI->getDataLayout();
1314 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1315 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1316 AATags);
1317 PredAvailable = findAvailablePtrLoadStore(
1318 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1319 &BatchAA, &IsLoadCSE, &NumScanedInst);
1320
1321 // If PredBB has a single predecessor, continue scanning through the
1322 // single predecessor.
1323 BasicBlock *SinglePredBB = PredBB;
1324 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1325 NumScanedInst < DefMaxInstsToScan) {
1326 SinglePredBB = SinglePredBB->getSinglePredecessor();
1327 if (SinglePredBB) {
1328 BBIt = SinglePredBB->end();
1329 PredAvailable = findAvailablePtrLoadStore(
1330 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1331 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1332 &NumScanedInst);
1333 }
1334 }
1335
1336 if (!PredAvailable) {
1337 OneUnavailablePred = PredBB;
1338 continue;
1339 }
1340
1341 if (IsLoadCSE)
1342 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1343
1344 // If so, this load is partially redundant. Remember this info so that we
1345 // can create a PHI node.
1346 AvailablePreds.emplace_back(PredBB, PredAvailable);
1347 }
1348
1349 // If the loaded value isn't available in any predecessor, it isn't partially
1350 // redundant.
1351 if (AvailablePreds.empty()) return false;
1352
1353 // Okay, the loaded value is available in at least one (and maybe all!)
1354 // predecessors. If the value is unavailable in more than one unique
1355 // predecessor, we want to insert a merge block for those common predecessors.
1356 // This ensures that we only have to insert one reload, thus not increasing
1357 // code size.
1358 BasicBlock *UnavailablePred = nullptr;
1359
1360 // If the value is unavailable in one of predecessors, we will end up
1361 // inserting a new instruction into them. It is only valid if all the
1362 // instructions before LoadI are guaranteed to pass execution to its
1363 // successor, or if LoadI is safe to speculate.
1364 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1365 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1366 // It requires domination tree analysis, so for this simple case it is an
1367 // overkill.
1368 if (PredsScanned.size() != AvailablePreds.size() &&
1370 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1372 return false;
1373
1374 // If there is exactly one predecessor where the value is unavailable, the
1375 // already computed 'OneUnavailablePred' block is it. If it ends in an
1376 // unconditional branch, we know that it isn't a critical edge.
1377 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1378 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1379 UnavailablePred = OneUnavailablePred;
1380 } else if (PredsScanned.size() != AvailablePreds.size()) {
1381 // Otherwise, we had multiple unavailable predecessors or we had a critical
1382 // edge from the one.
1383 SmallVector<BasicBlock*, 8> PredsToSplit;
1384 SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1385
1386 for (const auto &AvailablePred : AvailablePreds)
1387 AvailablePredSet.insert(AvailablePred.first);
1388
1389 // Add all the unavailable predecessors to the PredsToSplit list.
1390 for (BasicBlock *P : predecessors(LoadBB)) {
1391 // If the predecessor is an indirect goto, we can't split the edge.
1392 if (isa<IndirectBrInst>(P->getTerminator()))
1393 return false;
1394
1395 if (!AvailablePredSet.count(P))
1396 PredsToSplit.push_back(P);
1397 }
1398
1399 // Split them out to their own block.
1400 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1401 }
1402
1403 // If the value isn't available in all predecessors, then there will be
1404 // exactly one where it isn't available. Insert a load on that edge and add
1405 // it to the AvailablePreds list.
1406 if (UnavailablePred) {
1407 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1408 "Can't handle critical edge here!");
1409 LoadInst *NewVal = new LoadInst(
1410 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1411 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1412 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1413 UnavailablePred->getTerminator()->getIterator());
1414 NewVal->setDebugLoc(LoadI->getDebugLoc());
1415 if (AATags)
1416 NewVal->setAAMetadata(AATags);
1417
1418 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1419 }
1420
1421 // Now we know that each predecessor of this block has a value in
1422 // AvailablePreds, sort them for efficient access as we're walking the preds.
1423 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1424
1425 // Create a PHI node at the start of the block for the PRE'd load value.
1426 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1427 PN->insertBefore(LoadBB->begin());
1428 PN->takeName(LoadI);
1429 PN->setDebugLoc(LoadI->getDebugLoc());
1430
1431 // Insert new entries into the PHI for each predecessor. A single block may
1432 // have multiple entries here.
1433 for (BasicBlock *P : predecessors(LoadBB)) {
1434 AvailablePredsTy::iterator I =
1435 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1436
1437 assert(I != AvailablePreds.end() && I->first == P &&
1438 "Didn't find entry for predecessor!");
1439
1440 // If we have an available predecessor but it requires casting, insert the
1441 // cast in the predecessor and use the cast. Note that we have to update the
1442 // AvailablePreds vector as we go so that all of the PHI entries for this
1443 // predecessor use the same bitcast.
1444 Value *&PredV = I->second;
1445 if (PredV->getType() != LoadI->getType())
1447 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1448
1449 PN->addIncoming(PredV, I->first);
1450 }
1451
1452 for (LoadInst *PredLoadI : CSELoads) {
1453 combineMetadataForCSE(PredLoadI, LoadI, true);
1454 LVI->forgetValue(PredLoadI);
1455 }
1456
1457 LoadI->replaceAllUsesWith(PN);
1458 LoadI->eraseFromParent();
1459
1460 return true;
1461}
1462
1463/// findMostPopularDest - The specified list contains multiple possible
1464/// threadable destinations. Pick the one that occurs the most frequently in
1465/// the list.
1466static BasicBlock *
1468 const SmallVectorImpl<std::pair<BasicBlock *,
1469 BasicBlock *>> &PredToDestList) {
1470 assert(!PredToDestList.empty());
1471
1472 // Determine popularity. If there are multiple possible destinations, we
1473 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1474 // blocks with known and real destinations to threading undef. We'll handle
1475 // them later if interesting.
1476 MapVector<BasicBlock *, unsigned> DestPopularity;
1477
1478 // Populate DestPopularity with the successors in the order they appear in the
1479 // successor list. This way, we ensure determinism by iterating it in the
1480 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1481 // return nullptr when PredToDestList contains nullptr only.
1482 DestPopularity[nullptr] = 0;
1483 for (auto *SuccBB : successors(BB))
1484 DestPopularity[SuccBB] = 0;
1485
1486 for (const auto &PredToDest : PredToDestList)
1487 if (PredToDest.second)
1488 DestPopularity[PredToDest.second]++;
1489
1490 // Find the most popular dest.
1491 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1492
1493 // Okay, we have finally picked the most popular destination.
1494 return MostPopular->first;
1495}
1496
1497// Try to evaluate the value of V when the control flows from PredPredBB to
1498// BB->getSinglePredecessor() and then on to BB.
1500 BasicBlock *PredPredBB,
1501 Value *V,
1502 const DataLayout &DL) {
1503 BasicBlock *PredBB = BB->getSinglePredecessor();
1504 assert(PredBB && "Expected a single predecessor");
1505
1506 if (Constant *Cst = dyn_cast<Constant>(V)) {
1507 return Cst;
1508 }
1509
1510 // Consult LVI if V is not an instruction in BB or PredBB.
1511 Instruction *I = dyn_cast<Instruction>(V);
1512 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1513 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1514 }
1515
1516 // Look into a PHI argument.
1517 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1518 if (PHI->getParent() == PredBB)
1519 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1520 return nullptr;
1521 }
1522
1523 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1524 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1525 if (CondCmp->getParent() == BB) {
1526 Constant *Op0 =
1527 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
1528 Constant *Op1 =
1529 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
1530 if (Op0 && Op1) {
1531 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1532 Op1, DL);
1533 }
1534 }
1535 return nullptr;
1536 }
1537
1538 return nullptr;
1539}
1540
1542 ConstantPreference Preference,
1543 Instruction *CxtI) {
1544 // If threading this would thread across a loop header, don't even try to
1545 // thread the edge.
1546 if (LoopHeaders.count(BB))
1547 return false;
1548
1549 PredValueInfoTy PredValues;
1550 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1551 CxtI)) {
1552 // We don't have known values in predecessors. See if we can thread through
1553 // BB and its sole predecessor.
1555 }
1556
1557 assert(!PredValues.empty() &&
1558 "computeValueKnownInPredecessors returned true with no values");
1559
1560 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1561 for (const auto &PredValue : PredValues) {
1562 dbgs() << " BB '" << BB->getName()
1563 << "': FOUND condition = " << *PredValue.first
1564 << " for pred '" << PredValue.second->getName() << "'.\n";
1565 });
1566
1567 // Decide what we want to thread through. Convert our list of known values to
1568 // a list of known destinations for each pred. This also discards duplicate
1569 // predecessors and keeps track of the undefined inputs (which are represented
1570 // as a null dest in the PredToDestList).
1573
1574 BasicBlock *OnlyDest = nullptr;
1575 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1576 Constant *OnlyVal = nullptr;
1577 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1578
1579 for (const auto &PredValue : PredValues) {
1580 BasicBlock *Pred = PredValue.second;
1581 if (!SeenPreds.insert(Pred).second)
1582 continue; // Duplicate predecessor entry.
1583
1584 Constant *Val = PredValue.first;
1585
1586 BasicBlock *DestBB;
1587 if (isa<UndefValue>(Val))
1588 DestBB = nullptr;
1589 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1590 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1591 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1592 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1593 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1594 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1595 } else {
1596 assert(isa<IndirectBrInst>(BB->getTerminator())
1597 && "Unexpected terminator");
1598 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1599 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1600 }
1601
1602 // If we have exactly one destination, remember it for efficiency below.
1603 if (PredToDestList.empty()) {
1604 OnlyDest = DestBB;
1605 OnlyVal = Val;
1606 } else {
1607 if (OnlyDest != DestBB)
1608 OnlyDest = MultipleDestSentinel;
1609 // It possible we have same destination, but different value, e.g. default
1610 // case in switchinst.
1611 if (Val != OnlyVal)
1612 OnlyVal = MultipleVal;
1613 }
1614
1615 // If the predecessor ends with an indirect goto, we can't change its
1616 // destination.
1617 if (isa<IndirectBrInst>(Pred->getTerminator()))
1618 continue;
1619
1620 PredToDestList.emplace_back(Pred, DestBB);
1621 }
1622
1623 // If all edges were unthreadable, we fail.
1624 if (PredToDestList.empty())
1625 return false;
1626
1627 // If all the predecessors go to a single known successor, we want to fold,
1628 // not thread. By doing so, we do not need to duplicate the current block and
1629 // also miss potential opportunities in case we dont/cant duplicate.
1630 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1631 if (BB->hasNPredecessors(PredToDestList.size())) {
1632 bool SeenFirstBranchToOnlyDest = false;
1633 std::vector <DominatorTree::UpdateType> Updates;
1634 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1635 for (BasicBlock *SuccBB : successors(BB)) {
1636 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1637 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1638 } else {
1639 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1640 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1641 }
1642 }
1643
1644 // Finally update the terminator.
1645 Instruction *Term = BB->getTerminator();
1646 Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());
1647 NewBI->setDebugLoc(Term->getDebugLoc());
1648 ++NumFolds;
1649 Term->eraseFromParent();
1650 DTU->applyUpdatesPermissive(Updates);
1651 if (auto *BPI = getBPI())
1652 BPI->eraseBlock(BB);
1653
1654 // If the condition is now dead due to the removal of the old terminator,
1655 // erase it.
1656 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1657 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1658 CondInst->eraseFromParent();
1659 // We can safely replace *some* uses of the CondInst if it has
1660 // exactly one value as returned by LVI. RAUW is incorrect in the
1661 // presence of guards and assumes, that have the `Cond` as the use. This
1662 // is because we use the guards/assume to reason about the `Cond` value
1663 // at the end of block, but RAUW unconditionally replaces all uses
1664 // including the guards/assumes themselves and the uses before the
1665 // guard/assume.
1666 else if (OnlyVal && OnlyVal != MultipleVal)
1667 replaceFoldableUses(CondInst, OnlyVal, BB);
1668 }
1669 return true;
1670 }
1671 }
1672
1673 // Determine which is the most common successor. If we have many inputs and
1674 // this block is a switch, we want to start by threading the batch that goes
1675 // to the most popular destination first. If we only know about one
1676 // threadable destination (the common case) we can avoid this.
1677 BasicBlock *MostPopularDest = OnlyDest;
1678
1679 if (MostPopularDest == MultipleDestSentinel) {
1680 // Remove any loop headers from the Dest list, threadEdge conservatively
1681 // won't process them, but we might have other destination that are eligible
1682 // and we still want to process.
1683 erase_if(PredToDestList,
1684 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1685 return LoopHeaders.contains(PredToDest.second);
1686 });
1687
1688 if (PredToDestList.empty())
1689 return false;
1690
1691 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1692 }
1693
1694 // Now that we know what the most popular destination is, factor all
1695 // predecessors that will jump to it into a single predecessor.
1696 SmallVector<BasicBlock*, 16> PredsToFactor;
1697 for (const auto &PredToDest : PredToDestList)
1698 if (PredToDest.second == MostPopularDest) {
1699 BasicBlock *Pred = PredToDest.first;
1700
1701 // This predecessor may be a switch or something else that has multiple
1702 // edges to the block. Factor each of these edges by listing them
1703 // according to # occurrences in PredsToFactor.
1704 for (BasicBlock *Succ : successors(Pred))
1705 if (Succ == BB)
1706 PredsToFactor.push_back(Pred);
1707 }
1708
1709 // If the threadable edges are branching on an undefined value, we get to pick
1710 // the destination that these predecessors should get to.
1711 if (!MostPopularDest)
1712 MostPopularDest = BB->getTerminator()->
1713 getSuccessor(getBestDestForJumpOnUndef(BB));
1714
1715 // Ok, try to thread it!
1716 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1717}
1718
1719/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1720/// a PHI node (or freeze PHI) in the current block. See if there are any
1721/// simplifications we can do based on inputs to the phi node.
1723 BasicBlock *BB = PN->getParent();
1724
1725 // TODO: We could make use of this to do it once for blocks with common PHI
1726 // values.
1728 PredBBs.resize(1);
1729
1730 // If any of the predecessor blocks end in an unconditional branch, we can
1731 // *duplicate* the conditional branch into that block in order to further
1732 // encourage jump threading and to eliminate cases where we have branch on a
1733 // phi of an icmp (branch on icmp is much better).
1734 // This is still beneficial when a frozen phi is used as the branch condition
1735 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1736 // to br(icmp(freeze ...)).
1737 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1738 BasicBlock *PredBB = PN->getIncomingBlock(i);
1739 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1740 if (PredBr->isUnconditional()) {
1741 PredBBs[0] = PredBB;
1742 // Try to duplicate BB into PredBB.
1743 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1744 return true;
1745 }
1746 }
1747
1748 return false;
1749}
1750
1751/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1752/// a xor instruction in the current block. See if there are any
1753/// simplifications we can do based on inputs to the xor.
1755 BasicBlock *BB = BO->getParent();
1756
1757 // If either the LHS or RHS of the xor is a constant, don't do this
1758 // optimization.
1759 if (isa<ConstantInt>(BO->getOperand(0)) ||
1760 isa<ConstantInt>(BO->getOperand(1)))
1761 return false;
1762
1763 // If the first instruction in BB isn't a phi, we won't be able to infer
1764 // anything special about any particular predecessor.
1765 if (!isa<PHINode>(BB->front()))
1766 return false;
1767
1768 // If this BB is a landing pad, we won't be able to split the edge into it.
1769 if (BB->isEHPad())
1770 return false;
1771
1772 // If we have a xor as the branch input to this block, and we know that the
1773 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1774 // the condition into the predecessor and fix that value to true, saving some
1775 // logical ops on that path and encouraging other paths to simplify.
1776 //
1777 // This copies something like this:
1778 //
1779 // BB:
1780 // %X = phi i1 [1], [%X']
1781 // %Y = icmp eq i32 %A, %B
1782 // %Z = xor i1 %X, %Y
1783 // br i1 %Z, ...
1784 //
1785 // Into:
1786 // BB':
1787 // %Y = icmp ne i32 %A, %B
1788 // br i1 %Y, ...
1789
1790 PredValueInfoTy XorOpValues;
1791 bool isLHS = true;
1792 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1793 WantInteger, BO)) {
1794 assert(XorOpValues.empty());
1795 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1796 WantInteger, BO))
1797 return false;
1798 isLHS = false;
1799 }
1800
1801 assert(!XorOpValues.empty() &&
1802 "computeValueKnownInPredecessors returned true with no values");
1803
1804 // Scan the information to see which is most popular: true or false. The
1805 // predecessors can be of the set true, false, or undef.
1806 unsigned NumTrue = 0, NumFalse = 0;
1807 for (const auto &XorOpValue : XorOpValues) {
1808 if (isa<UndefValue>(XorOpValue.first))
1809 // Ignore undefs for the count.
1810 continue;
1811 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1812 ++NumFalse;
1813 else
1814 ++NumTrue;
1815 }
1816
1817 // Determine which value to split on, true, false, or undef if neither.
1818 ConstantInt *SplitVal = nullptr;
1819 if (NumTrue > NumFalse)
1820 SplitVal = ConstantInt::getTrue(BB->getContext());
1821 else if (NumTrue != 0 || NumFalse != 0)
1822 SplitVal = ConstantInt::getFalse(BB->getContext());
1823
1824 // Collect all of the blocks that this can be folded into so that we can
1825 // factor this once and clone it once.
1826 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1827 for (const auto &XorOpValue : XorOpValues) {
1828 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1829 continue;
1830
1831 BlocksToFoldInto.push_back(XorOpValue.second);
1832 }
1833
1834 // If we inferred a value for all of the predecessors, then duplication won't
1835 // help us. However, we can just replace the LHS or RHS with the constant.
1836 if (BlocksToFoldInto.size() ==
1837 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1838 if (!SplitVal) {
1839 // If all preds provide undef, just nuke the xor, because it is undef too.
1841 BO->eraseFromParent();
1842 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1843 // If all preds provide 0, replace the xor with the other input.
1844 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1845 BO->eraseFromParent();
1846 } else {
1847 // If all preds provide 1, set the computed value to 1.
1848 BO->setOperand(!isLHS, SplitVal);
1849 }
1850
1851 return true;
1852 }
1853
1854 // If any of predecessors end with an indirect goto, we can't change its
1855 // destination.
1856 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1857 return isa<IndirectBrInst>(Pred->getTerminator());
1858 }))
1859 return false;
1860
1861 // Try to duplicate BB into PredBB.
1862 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1863}
1864
1865/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1866/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1867/// NewPred using the entries from OldPred (suitably mapped).
1869 BasicBlock *OldPred,
1870 BasicBlock *NewPred,
1872 for (PHINode &PN : PHIBB->phis()) {
1873 // Ok, we have a PHI node. Figure out what the incoming value was for the
1874 // DestBlock.
1875 Value *IV = PN.getIncomingValueForBlock(OldPred);
1876
1877 // Remap the value if necessary.
1878 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1880 if (I != ValueMap.end())
1881 IV = I->second;
1882 }
1883
1884 PN.addIncoming(IV, NewPred);
1885 }
1886}
1887
1888/// Merge basic block BB into its sole predecessor if possible.
1890 BasicBlock *SinglePred = BB->getSinglePredecessor();
1891 if (!SinglePred)
1892 return false;
1893
1894 const Instruction *TI = SinglePred->getTerminator();
1895 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1896 SinglePred == BB || hasAddressTakenAndUsed(BB))
1897 return false;
1898
1899 // If SinglePred was a loop header, BB becomes one.
1900 if (LoopHeaders.erase(SinglePred))
1901 LoopHeaders.insert(BB);
1902
1903 LVI->eraseBlock(SinglePred);
1904 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1905
1906 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1907 // BB code within one basic block `BB`), we need to invalidate the LVI
1908 // information associated with BB, because the LVI information need not be
1909 // true for all of BB after the merge. For example,
1910 // Before the merge, LVI info and code is as follows:
1911 // SinglePred: <LVI info1 for %p val>
1912 // %y = use of %p
1913 // call @exit() // need not transfer execution to successor.
1914 // assume(%p) // from this point on %p is true
1915 // br label %BB
1916 // BB: <LVI info2 for %p val, i.e. %p is true>
1917 // %x = use of %p
1918 // br label exit
1919 //
1920 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1921 // (info2 and info1 respectively). After the merge and the deletion of the
1922 // LVI info1 for SinglePred. We have the following code:
1923 // BB: <LVI info2 for %p val>
1924 // %y = use of %p
1925 // call @exit()
1926 // assume(%p)
1927 // %x = use of %p <-- LVI info2 is correct from here onwards.
1928 // br label exit
1929 // LVI info2 for BB is incorrect at the beginning of BB.
1930
1931 // Invalidate LVI information for BB if the LVI is not provably true for
1932 // all of BB.
1934 LVI->eraseBlock(BB);
1935 return true;
1936}
1937
1938/// Update the SSA form. NewBB contains instructions that are copied from BB.
1939/// ValueMapping maps old values in BB to new ones in NewBB.
1941 ValueToValueMapTy &ValueMapping) {
1942 // If there were values defined in BB that are used outside the block, then we
1943 // now have to update all uses of the value to use either the original value,
1944 // the cloned value, or some PHI derived value. This can require arbitrary
1945 // PHI insertion, of which we are prepared to do, clean these up now.
1946 SSAUpdater SSAUpdate;
1947 SmallVector<Use *, 16> UsesToRename;
1949 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1950
1951 for (Instruction &I : *BB) {
1952 // Scan all uses of this instruction to see if it is used outside of its
1953 // block, and if so, record them in UsesToRename.
1954 for (Use &U : I.uses()) {
1955 Instruction *User = cast<Instruction>(U.getUser());
1956 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1957 if (UserPN->getIncomingBlock(U) == BB)
1958 continue;
1959 } else if (User->getParent() == BB)
1960 continue;
1961
1962 UsesToRename.push_back(&U);
1963 }
1964
1965 // Find debug values outside of the block
1966 findDbgValues(DbgValues, &I, &DbgVariableRecords);
1967 llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
1968 return DbgVal->getParent() == BB;
1969 });
1970 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
1971 return DbgVarRec->getParent() == BB;
1972 });
1973
1974 // If there are no uses outside the block, we're done with this instruction.
1975 if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())
1976 continue;
1977 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1978
1979 // We found a use of I outside of BB. Rename all uses of I that are outside
1980 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1981 // with the two values we know.
1982 SSAUpdate.Initialize(I.getType(), I.getName());
1983 SSAUpdate.AddAvailableValue(BB, &I);
1984 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1985
1986 while (!UsesToRename.empty())
1987 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1988 if (!DbgValues.empty() || !DbgVariableRecords.empty()) {
1989 SSAUpdate.UpdateDebugValues(&I, DbgValues);
1990 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
1991 DbgValues.clear();
1992 DbgVariableRecords.clear();
1993 }
1994
1995 LLVM_DEBUG(dbgs() << "\n");
1996 }
1997}
1998
1999/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2000/// arguments that come from PredBB. Return the map from the variables in the
2001/// source basic block to the variables in the newly created basic block.
2002
2006 BasicBlock *NewBB,
2007 BasicBlock *PredBB) {
2008 // We are going to have to map operands from the source basic block to the new
2009 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2010 // block, evaluate them to account for entry from PredBB.
2011
2012 // Retargets llvm.dbg.value to any renamed variables.
2013 auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
2014 auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2015 if (!DbgInstruction)
2016 return false;
2017
2018 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2019 for (auto DbgOperand : DbgInstruction->location_ops()) {
2020 auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2021 if (!DbgOperandInstruction)
2022 continue;
2023
2024 auto I = ValueMapping.find(DbgOperandInstruction);
2025 if (I != ValueMapping.end()) {
2026 OperandsToRemap.insert(
2027 std::pair<Value *, Value *>(DbgOperand, I->second));
2028 }
2029 }
2030
2031 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2032 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2033 return true;
2034 };
2035
2036 // Duplicate implementation of the above dbg.value code, using
2037 // DbgVariableRecords instead.
2038 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2039 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2040 for (auto *Op : DVR->location_ops()) {
2041 Instruction *OpInst = dyn_cast<Instruction>(Op);
2042 if (!OpInst)
2043 continue;
2044
2045 auto I = ValueMapping.find(OpInst);
2046 if (I != ValueMapping.end())
2047 OperandsToRemap.insert({OpInst, I->second});
2048 }
2049
2050 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2051 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2052 };
2053
2054 BasicBlock *RangeBB = BI->getParent();
2055
2056 // Clone the phi nodes of the source basic block into NewBB. The resulting
2057 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2058 // might need to rewrite the operand of the cloned phi.
2059 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2060 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2061 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2062 ValueMapping[PN] = NewPN;
2063 }
2064
2065 // Clone noalias scope declarations in the threaded block. When threading a
2066 // loop exit, we would otherwise end up with two idential scope declarations
2067 // visible at the same time.
2068 SmallVector<MDNode *> NoAliasScopes;
2069 DenseMap<MDNode *, MDNode *> ClonedScopes;
2070 LLVMContext &Context = PredBB->getContext();
2071 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2072 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2073
2074 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2075 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2076 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2077 RetargetDbgVariableRecordIfPossible(&DVR);
2078 };
2079
2080 // Clone the non-phi instructions of the source basic block into NewBB,
2081 // keeping track of the mapping and using it to remap operands in the cloned
2082 // instructions.
2083 for (; BI != BE; ++BI) {
2084 Instruction *New = BI->clone();
2085 New->setName(BI->getName());
2086 New->insertInto(NewBB, NewBB->end());
2087 ValueMapping[&*BI] = New;
2088 adaptNoAliasScopes(New, ClonedScopes, Context);
2089
2090 CloneAndRemapDbgInfo(New, &*BI);
2091
2092 if (RetargetDbgValueIfPossible(New))
2093 continue;
2094
2095 // Remap operands to patch up intra-block references.
2096 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2097 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2098 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2099 if (I != ValueMapping.end())
2100 New->setOperand(i, I->second);
2101 }
2102 }
2103
2104 // There may be DbgVariableRecords on the terminator, clone directly from
2105 // marker to marker as there isn't an instruction there.
2106 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2107 // Dump them at the end.
2108 DbgMarker *Marker = RangeBB->getMarker(BE);
2109 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2110 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2111 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2112 RetargetDbgVariableRecordIfPossible(&DVR);
2113 }
2114
2115 return;
2116}
2117
2118/// Attempt to thread through two successive basic blocks.
2120 Value *Cond) {
2121 // Consider:
2122 //
2123 // PredBB:
2124 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2125 // %tobool = icmp eq i32 %cond, 0
2126 // br i1 %tobool, label %BB, label ...
2127 //
2128 // BB:
2129 // %cmp = icmp eq i32* %var, null
2130 // br i1 %cmp, label ..., label ...
2131 //
2132 // We don't know the value of %var at BB even if we know which incoming edge
2133 // we take to BB. However, once we duplicate PredBB for each of its incoming
2134 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2135 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2136
2137 // Require that BB end with a Branch for simplicity.
2138 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2139 if (!CondBr)
2140 return false;
2141
2142 // BB must have exactly one predecessor.
2143 BasicBlock *PredBB = BB->getSinglePredecessor();
2144 if (!PredBB)
2145 return false;
2146
2147 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2148 // unconditional branch, we should be merging PredBB and BB instead. For
2149 // simplicity, we don't deal with a switch.
2150 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2151 if (!PredBBBranch || PredBBBranch->isUnconditional())
2152 return false;
2153
2154 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2155 // PredBB.
2156 if (PredBB->getSinglePredecessor())
2157 return false;
2158
2159 // Don't thread through PredBB if it contains a successor edge to itself, in
2160 // which case we would infinite loop. Suppose we are threading an edge from
2161 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2162 // successor edge to itself. If we allowed jump threading in this case, we
2163 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2164 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2165 // with another jump threading opportunity from PredBB.thread through PredBB
2166 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2167 // would keep peeling one iteration from PredBB.
2168 if (llvm::is_contained(successors(PredBB), PredBB))
2169 return false;
2170
2171 // Don't thread across a loop header.
2172 if (LoopHeaders.count(PredBB))
2173 return false;
2174
2175 // Avoid complication with duplicating EH pads.
2176 if (PredBB->isEHPad())
2177 return false;
2178
2179 // Find a predecessor that we can thread. For simplicity, we only consider a
2180 // successor edge out of BB to which we thread exactly one incoming edge into
2181 // PredBB.
2182 unsigned ZeroCount = 0;
2183 unsigned OneCount = 0;
2184 BasicBlock *ZeroPred = nullptr;
2185 BasicBlock *OnePred = nullptr;
2186 const DataLayout &DL = BB->getDataLayout();
2187 for (BasicBlock *P : predecessors(PredBB)) {
2188 // If PredPred ends with IndirectBrInst, we can't handle it.
2189 if (isa<IndirectBrInst>(P->getTerminator()))
2190 continue;
2191 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2193 if (CI->isZero()) {
2194 ZeroCount++;
2195 ZeroPred = P;
2196 } else if (CI->isOne()) {
2197 OneCount++;
2198 OnePred = P;
2199 }
2200 }
2201 }
2202
2203 // Disregard complicated cases where we have to thread multiple edges.
2204 BasicBlock *PredPredBB;
2205 if (ZeroCount == 1) {
2206 PredPredBB = ZeroPred;
2207 } else if (OneCount == 1) {
2208 PredPredBB = OnePred;
2209 } else {
2210 return false;
2211 }
2212
2213 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2214
2215 // If threading to the same block as we come from, we would infinite loop.
2216 if (SuccBB == BB) {
2217 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2218 << "' - would thread to self!\n");
2219 return false;
2220 }
2221
2222 // If threading this would thread across a loop header, don't thread the edge.
2223 // See the comments above findLoopHeaders for justifications and caveats.
2224 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2225 LLVM_DEBUG({
2226 bool BBIsHeader = LoopHeaders.count(BB);
2227 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2228 dbgs() << " Not threading across "
2229 << (BBIsHeader ? "loop header BB '" : "block BB '")
2230 << BB->getName() << "' to dest "
2231 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2232 << SuccBB->getName()
2233 << "' - it might create an irreducible loop!\n";
2234 });
2235 return false;
2236 }
2237
2238 // Compute the cost of duplicating BB and PredBB.
2239 unsigned BBCost = getJumpThreadDuplicationCost(
2240 TTI, BB, BB->getTerminator(), BBDupThreshold);
2241 unsigned PredBBCost = getJumpThreadDuplicationCost(
2242 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2243
2244 // Give up if costs are too high. We need to check BBCost and PredBBCost
2245 // individually before checking their sum because getJumpThreadDuplicationCost
2246 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2247 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2248 BBCost + PredBBCost > BBDupThreshold) {
2249 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2250 << "' - Cost is too high: " << PredBBCost
2251 << " for PredBB, " << BBCost << "for BB\n");
2252 return false;
2253 }
2254
2255 // Now we are ready to duplicate PredBB.
2256 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2257 return true;
2258}
2259
2261 BasicBlock *PredBB,
2262 BasicBlock *BB,
2263 BasicBlock *SuccBB) {
2264 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2265 << BB->getName() << "'\n");
2266
2267 // Build BPI/BFI before any changes are made to IR.
2268 bool HasProfile = doesBlockHaveProfileData(BB);
2269 auto *BFI = getOrCreateBFI(HasProfile);
2270 auto *BPI = getOrCreateBPI(BFI != nullptr);
2271
2272 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2273 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2274
2275 BasicBlock *NewBB =
2276 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2277 PredBB->getParent(), PredBB);
2278 NewBB->moveAfter(PredBB);
2279
2280 // Set the block frequency of NewBB.
2281 if (BFI) {
2282 assert(BPI && "It's expected BPI to exist along with BFI");
2283 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2284 BPI->getEdgeProbability(PredPredBB, PredBB);
2285 BFI->setBlockFreq(NewBB, NewBBFreq);
2286 }
2287
2288 // We are going to have to map operands from the original BB block to the new
2289 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2290 // to account for entry from PredPredBB.
2291 ValueToValueMapTy ValueMapping;
2292 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2293 PredPredBB);
2294
2295 // Copy the edge probabilities from PredBB to NewBB.
2296 if (BPI)
2297 BPI->copyEdgeProbabilities(PredBB, NewBB);
2298
2299 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2300 // This eliminates predecessors from PredPredBB, which requires us to simplify
2301 // any PHI nodes in PredBB.
2302 Instruction *PredPredTerm = PredPredBB->getTerminator();
2303 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2304 if (PredPredTerm->getSuccessor(i) == PredBB) {
2305 PredBB->removePredecessor(PredPredBB, true);
2306 PredPredTerm->setSuccessor(i, NewBB);
2307 }
2308
2309 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2310 ValueMapping);
2311 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2312 ValueMapping);
2313
2314 DTU->applyUpdatesPermissive(
2315 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2316 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2317 {DominatorTree::Insert, PredPredBB, NewBB},
2318 {DominatorTree::Delete, PredPredBB, PredBB}});
2319
2320 updateSSA(PredBB, NewBB, ValueMapping);
2321
2322 // Clean up things like PHI nodes with single operands, dead instructions,
2323 // etc.
2324 SimplifyInstructionsInBlock(NewBB, TLI);
2325 SimplifyInstructionsInBlock(PredBB, TLI);
2326
2327 SmallVector<BasicBlock *, 1> PredsToFactor;
2328 PredsToFactor.push_back(NewBB);
2329 threadEdge(BB, PredsToFactor, SuccBB);
2330}
2331
2332/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2334 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2335 BasicBlock *SuccBB) {
2336 // If threading to the same block as we come from, we would infinite loop.
2337 if (SuccBB == BB) {
2338 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2339 << "' - would thread to self!\n");
2340 return false;
2341 }
2342
2343 // If threading this would thread across a loop header, don't thread the edge.
2344 // See the comments above findLoopHeaders for justifications and caveats.
2345 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2346 LLVM_DEBUG({
2347 bool BBIsHeader = LoopHeaders.count(BB);
2348 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2349 dbgs() << " Not threading across "
2350 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2351 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2352 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2353 });
2354 return false;
2355 }
2356
2357 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2358 TTI, BB, BB->getTerminator(), BBDupThreshold);
2359 if (JumpThreadCost > BBDupThreshold) {
2360 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2361 << "' - Cost is too high: " << JumpThreadCost << "\n");
2362 return false;
2363 }
2364
2365 threadEdge(BB, PredBBs, SuccBB);
2366 return true;
2367}
2368
2369/// threadEdge - We have decided that it is safe and profitable to factor the
2370/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2371/// across BB. Transform the IR to reflect this change.
2373 const SmallVectorImpl<BasicBlock *> &PredBBs,
2374 BasicBlock *SuccBB) {
2375 assert(SuccBB != BB && "Don't create an infinite loop");
2376
2377 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2378 "Don't thread across loop headers");
2379
2380 // Build BPI/BFI before any changes are made to IR.
2381 bool HasProfile = doesBlockHaveProfileData(BB);
2382 auto *BFI = getOrCreateBFI(HasProfile);
2383 auto *BPI = getOrCreateBPI(BFI != nullptr);
2384
2385 // And finally, do it! Start by factoring the predecessors if needed.
2386 BasicBlock *PredBB;
2387 if (PredBBs.size() == 1)
2388 PredBB = PredBBs[0];
2389 else {
2390 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2391 << " common predecessors.\n");
2392 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2393 }
2394
2395 // And finally, do it!
2396 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2397 << "' to '" << SuccBB->getName()
2398 << ", across block:\n " << *BB << "\n");
2399
2400 LVI->threadEdge(PredBB, BB, SuccBB);
2401
2403 BB->getName()+".thread",
2404 BB->getParent(), BB);
2405 NewBB->moveAfter(PredBB);
2406
2407 // Set the block frequency of NewBB.
2408 if (BFI) {
2409 assert(BPI && "It's expected BPI to exist along with BFI");
2410 auto NewBBFreq =
2411 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2412 BFI->setBlockFreq(NewBB, NewBBFreq);
2413 }
2414
2415 // Copy all the instructions from BB to NewBB except the terminator.
2416 ValueToValueMapTy ValueMapping;
2417 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2418 PredBB);
2419
2420 // We didn't copy the terminator from BB over to NewBB, because there is now
2421 // an unconditional jump to SuccBB. Insert the unconditional jump.
2422 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2423 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2424
2425 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2426 // PHI nodes for NewBB now.
2427 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2428
2429 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2430 // eliminates predecessors from BB, which requires us to simplify any PHI
2431 // nodes in BB.
2432 Instruction *PredTerm = PredBB->getTerminator();
2433 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2434 if (PredTerm->getSuccessor(i) == BB) {
2435 BB->removePredecessor(PredBB, true);
2436 PredTerm->setSuccessor(i, NewBB);
2437 }
2438
2439 // Enqueue required DT updates.
2440 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2441 {DominatorTree::Insert, PredBB, NewBB},
2442 {DominatorTree::Delete, PredBB, BB}});
2443
2444 updateSSA(BB, NewBB, ValueMapping);
2445
2446 // At this point, the IR is fully up to date and consistent. Do a quick scan
2447 // over the new instructions and zap any that are constants or dead. This
2448 // frequently happens because of phi translation.
2449 SimplifyInstructionsInBlock(NewBB, TLI);
2450
2451 // Update the edge weight from BB to SuccBB, which should be less than before.
2452 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2453
2454 // Threaded an edge!
2455 ++NumThreads;
2456}
2457
2458/// Create a new basic block that will be the predecessor of BB and successor of
2459/// all blocks in Preds. When profile data is available, update the frequency of
2460/// this new block.
2461BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2463 const char *Suffix) {
2465
2466 // Collect the frequencies of all predecessors of BB, which will be used to
2467 // update the edge weight of the result of splitting predecessors.
2469 auto *BFI = getBFI();
2470 if (BFI) {
2471 auto *BPI = getOrCreateBPI(true);
2472 for (auto *Pred : Preds)
2473 FreqMap.insert(std::make_pair(
2474 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2475 }
2476
2477 // In the case when BB is a LandingPad block we create 2 new predecessors
2478 // instead of just one.
2479 if (BB->isLandingPad()) {
2480 std::string NewName = std::string(Suffix) + ".split-lp";
2481 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2482 } else {
2483 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2484 }
2485
2486 std::vector<DominatorTree::UpdateType> Updates;
2487 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2488 for (auto *NewBB : NewBBs) {
2489 BlockFrequency NewBBFreq(0);
2490 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2491 for (auto *Pred : predecessors(NewBB)) {
2492 Updates.push_back({DominatorTree::Delete, Pred, BB});
2493 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2494 if (BFI) // Update frequencies between Pred -> NewBB.
2495 NewBBFreq += FreqMap.lookup(Pred);
2496 }
2497 if (BFI) // Apply the summed frequency to NewBB.
2498 BFI->setBlockFreq(NewBB, NewBBFreq);
2499 }
2500
2501 DTU->applyUpdatesPermissive(Updates);
2502 return NewBBs[0];
2503}
2504
2505bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2506 const Instruction *TI = BB->getTerminator();
2507 if (!TI || TI->getNumSuccessors() < 2)
2508 return false;
2509
2510 return hasValidBranchWeightMD(*TI);
2511}
2512
2513/// Update the block frequency of BB and branch weight and the metadata on the
2514/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2515/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2516void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2517 BasicBlock *BB,
2518 BasicBlock *NewBB,
2519 BasicBlock *SuccBB,
2520 BlockFrequencyInfo *BFI,
2522 bool HasProfile) {
2523 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2524 "Both BFI & BPI should either be set or unset");
2525
2526 if (!BFI) {
2527 assert(!HasProfile &&
2528 "It's expected to have BFI/BPI when profile info exists");
2529 return;
2530 }
2531
2532 // As the edge from PredBB to BB is deleted, we have to update the block
2533 // frequency of BB.
2534 auto BBOrigFreq = BFI->getBlockFreq(BB);
2535 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2536 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2537 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2538 BFI->setBlockFreq(BB, BBNewFreq);
2539
2540 // Collect updated outgoing edges' frequencies from BB and use them to update
2541 // edge probabilities.
2542 SmallVector<uint64_t, 4> BBSuccFreq;
2543 for (BasicBlock *Succ : successors(BB)) {
2544 auto SuccFreq = (Succ == SuccBB)
2545 ? BB2SuccBBFreq - NewBBFreq
2546 : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2547 BBSuccFreq.push_back(SuccFreq.getFrequency());
2548 }
2549
2550 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2551
2553 if (MaxBBSuccFreq == 0)
2554 BBSuccProbs.assign(BBSuccFreq.size(),
2555 {1, static_cast<unsigned>(BBSuccFreq.size())});
2556 else {
2557 for (uint64_t Freq : BBSuccFreq)
2558 BBSuccProbs.push_back(
2559 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2560 // Normalize edge probabilities so that they sum up to one.
2562 BBSuccProbs.end());
2563 }
2564
2565 // Update edge probabilities in BPI.
2566 BPI->setEdgeProbability(BB, BBSuccProbs);
2567
2568 // Update the profile metadata as well.
2569 //
2570 // Don't do this if the profile of the transformed blocks was statically
2571 // estimated. (This could occur despite the function having an entry
2572 // frequency in completely cold parts of the CFG.)
2573 //
2574 // In this case we don't want to suggest to subsequent passes that the
2575 // calculated weights are fully consistent. Consider this graph:
2576 //
2577 // check_1
2578 // 50% / |
2579 // eq_1 | 50%
2580 // \ |
2581 // check_2
2582 // 50% / |
2583 // eq_2 | 50%
2584 // \ |
2585 // check_3
2586 // 50% / |
2587 // eq_3 | 50%
2588 // \ |
2589 //
2590 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2591 // the overall probabilities are inconsistent; the total probability that the
2592 // value is either 1, 2 or 3 is 150%.
2593 //
2594 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2595 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2596 // the loop exit edge. Then based solely on static estimation we would assume
2597 // the loop was extremely hot.
2598 //
2599 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2600 // shouldn't make edges extremely likely or unlikely based solely on static
2601 // estimation.
2602 if (BBSuccProbs.size() >= 2 && HasProfile) {
2604 for (auto Prob : BBSuccProbs)
2605 Weights.push_back(Prob.getNumerator());
2606
2607 auto TI = BB->getTerminator();
2608 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2609 }
2610}
2611
2612/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2613/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2614/// If we can duplicate the contents of BB up into PredBB do so now, this
2615/// improves the odds that the branch will be on an analyzable instruction like
2616/// a compare.
2618 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2619 assert(!PredBBs.empty() && "Can't handle an empty set");
2620
2621 // If BB is a loop header, then duplicating this block outside the loop would
2622 // cause us to transform this into an irreducible loop, don't do this.
2623 // See the comments above findLoopHeaders for justifications and caveats.
2624 if (LoopHeaders.count(BB)) {
2625 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2626 << "' into predecessor block '" << PredBBs[0]->getName()
2627 << "' - it might create an irreducible loop!\n");
2628 return false;
2629 }
2630
2631 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2632 TTI, BB, BB->getTerminator(), BBDupThreshold);
2633 if (DuplicationCost > BBDupThreshold) {
2634 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2635 << "' - Cost is too high: " << DuplicationCost << "\n");
2636 return false;
2637 }
2638
2639 // And finally, do it! Start by factoring the predecessors if needed.
2640 std::vector<DominatorTree::UpdateType> Updates;
2641 BasicBlock *PredBB;
2642 if (PredBBs.size() == 1)
2643 PredBB = PredBBs[0];
2644 else {
2645 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2646 << " common predecessors.\n");
2647 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2648 }
2649 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2650
2651 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2652 // of PredBB.
2653 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2654 << "' into end of '" << PredBB->getName()
2655 << "' to eliminate branch on phi. Cost: "
2656 << DuplicationCost << " block is:" << *BB << "\n");
2657
2658 // Unless PredBB ends with an unconditional branch, split the edge so that we
2659 // can just clone the bits from BB into the end of the new PredBB.
2660 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2661
2662 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2663 BasicBlock *OldPredBB = PredBB;
2664 PredBB = SplitEdge(OldPredBB, BB);
2665 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2666 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2667 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2668 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2669 }
2670
2671 // We are going to have to map operands from the original BB block into the
2672 // PredBB block. Evaluate PHI nodes in BB.
2673 ValueToValueMapTy ValueMapping;
2674
2675 BasicBlock::iterator BI = BB->begin();
2676 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2677 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2678 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2679 // mapping and using it to remap operands in the cloned instructions.
2680 for (; BI != BB->end(); ++BI) {
2681 Instruction *New = BI->clone();
2682 New->insertInto(PredBB, OldPredBranch->getIterator());
2683
2684 // Remap operands to patch up intra-block references.
2685 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2686 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2687 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2688 if (I != ValueMapping.end())
2689 New->setOperand(i, I->second);
2690 }
2691
2692 // Remap debug variable operands.
2693 remapDebugVariable(ValueMapping, New);
2694
2695 // If this instruction can be simplified after the operands are updated,
2696 // just use the simplified value instead. This frequently happens due to
2697 // phi translation.
2699 New,
2700 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2701 ValueMapping[&*BI] = IV;
2702 if (!New->mayHaveSideEffects()) {
2703 New->eraseFromParent();
2704 New = nullptr;
2705 // Clone debug-info on the elided instruction to the destination
2706 // position.
2707 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2708 }
2709 } else {
2710 ValueMapping[&*BI] = New;
2711 }
2712 if (New) {
2713 // Otherwise, insert the new instruction into the block.
2714 New->setName(BI->getName());
2715 // Clone across any debug-info attached to the old instruction.
2716 New->cloneDebugInfoFrom(&*BI);
2717 // Update Dominance from simplified New instruction operands.
2718 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2719 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2720 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2721 }
2722 }
2723
2724 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2725 // add entries to the PHI nodes for branch from PredBB now.
2726 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2727 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2728 ValueMapping);
2729 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2730 ValueMapping);
2731
2732 updateSSA(BB, PredBB, ValueMapping);
2733
2734 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2735 // that we nuked.
2736 BB->removePredecessor(PredBB, true);
2737
2738 // Remove the unconditional branch at the end of the PredBB block.
2739 OldPredBranch->eraseFromParent();
2740 if (auto *BPI = getBPI())
2741 BPI->copyEdgeProbabilities(BB, PredBB);
2742 DTU->applyUpdatesPermissive(Updates);
2743
2744 ++NumDupes;
2745 return true;
2746}
2747
2748// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2749// a Select instruction in Pred. BB has other predecessors and SI is used in
2750// a PHI node in BB. SI has no other use.
2751// A new basic block, NewBB, is created and SI is converted to compare and
2752// conditional branch. SI is erased from parent.
2754 SelectInst *SI, PHINode *SIUse,
2755 unsigned Idx) {
2756 // Expand the select.
2757 //
2758 // Pred --
2759 // | v
2760 // | NewBB
2761 // | |
2762 // |-----
2763 // v
2764 // BB
2765 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2766 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2767 BB->getParent(), BB);
2768 // Move the unconditional branch to NewBB.
2769 PredTerm->removeFromParent();
2770 PredTerm->insertInto(NewBB, NewBB->end());
2771 // Create a conditional branch and update PHI nodes.
2772 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2773 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2774 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2775 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2776 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2777
2778 uint64_t TrueWeight = 1;
2779 uint64_t FalseWeight = 1;
2780 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2781 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2782 (TrueWeight + FalseWeight) != 0) {
2785 TrueWeight, TrueWeight + FalseWeight));
2787 FalseWeight, TrueWeight + FalseWeight));
2788 // Update BPI if exists.
2789 if (auto *BPI = getBPI())
2790 BPI->setEdgeProbability(Pred, BP);
2791 }
2792 // Set the block frequency of NewBB.
2793 if (auto *BFI = getBFI()) {
2794 if ((TrueWeight + FalseWeight) == 0) {
2795 TrueWeight = 1;
2796 FalseWeight = 1;
2797 }
2799 TrueWeight, TrueWeight + FalseWeight);
2800 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2801 BFI->setBlockFreq(NewBB, NewBBFreq);
2802 }
2803
2804 // The select is now dead.
2805 SI->eraseFromParent();
2806 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2807 {DominatorTree::Insert, Pred, NewBB}});
2808
2809 // Update any other PHI nodes in BB.
2810 for (BasicBlock::iterator BI = BB->begin();
2811 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2812 if (Phi != SIUse)
2813 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2814}
2815
2817 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2818
2819 if (!CondPHI || CondPHI->getParent() != BB)
2820 return false;
2821
2822 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2823 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2824 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2825
2826 // The second and third condition can be potentially relaxed. Currently
2827 // the conditions help to simplify the code and allow us to reuse existing
2828 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2829 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2830 continue;
2831
2832 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2833 if (!PredTerm || !PredTerm->isUnconditional())
2834 continue;
2835
2836 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2837 return true;
2838 }
2839 return false;
2840}
2841
2842/// tryToUnfoldSelect - Look for blocks of the form
2843/// bb1:
2844/// %a = select
2845/// br bb2
2846///
2847/// bb2:
2848/// %p = phi [%a, %bb1] ...
2849/// %c = icmp %p
2850/// br i1 %c
2851///
2852/// And expand the select into a branch structure if one of its arms allows %c
2853/// to be folded. This later enables threading from bb1 over bb2.
2855 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2856 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2857 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2858
2859 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2860 CondLHS->getParent() != BB)
2861 return false;
2862
2863 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2864 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2865 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2866
2867 // Look if one of the incoming values is a select in the corresponding
2868 // predecessor.
2869 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2870 continue;
2871
2872 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2873 if (!PredTerm || !PredTerm->isUnconditional())
2874 continue;
2875
2876 // Now check if one of the select values would allow us to constant fold the
2877 // terminator in BB. We don't do the transform if both sides fold, those
2878 // cases will be threaded in any case.
2879 Constant *LHSRes =
2880 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2881 CondRHS, Pred, BB, CondCmp);
2882 Constant *RHSRes =
2883 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2884 CondRHS, Pred, BB, CondCmp);
2885 if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2886 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2887 return true;
2888 }
2889 }
2890 return false;
2891}
2892
2893/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2894/// same BB in the form
2895/// bb:
2896/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2897/// %s = select %p, trueval, falseval
2898///
2899/// or
2900///
2901/// bb:
2902/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2903/// %c = cmp %p, 0
2904/// %s = select %c, trueval, falseval
2905///
2906/// And expand the select into a branch structure. This later enables
2907/// jump-threading over bb in this pass.
2908///
2909/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2910/// select if the associated PHI has at least one constant. If the unfolded
2911/// select is not jump-threaded, it will be folded again in the later
2912/// optimizations.
2914 // This transform would reduce the quality of msan diagnostics.
2915 // Disable this transform under MemorySanitizer.
2916 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2917 return false;
2918
2919 // If threading this would thread across a loop header, don't thread the edge.
2920 // See the comments above findLoopHeaders for justifications and caveats.
2921 if (LoopHeaders.count(BB))
2922 return false;
2923
2924 for (BasicBlock::iterator BI = BB->begin();
2925 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2926 // Look for a Phi having at least one constant incoming value.
2927 if (llvm::all_of(PN->incoming_values(),
2928 [](Value *V) { return !isa<ConstantInt>(V); }))
2929 continue;
2930
2931 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2932 using namespace PatternMatch;
2933
2934 // Check if SI is in BB and use V as condition.
2935 if (SI->getParent() != BB)
2936 return false;
2937 Value *Cond = SI->getCondition();
2938 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2939 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2940 };
2941
2942 SelectInst *SI = nullptr;
2943 for (Use &U : PN->uses()) {
2944 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2945 // Look for a ICmp in BB that compares PN with a constant and is the
2946 // condition of a Select.
2947 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2948 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2949 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2950 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2951 SI = SelectI;
2952 break;
2953 }
2954 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2955 // Look for a Select in BB that uses PN as condition.
2956 if (isUnfoldCandidate(SelectI, U.get())) {
2957 SI = SelectI;
2958 break;
2959 }
2960 }
2961 }
2962
2963 if (!SI)
2964 continue;
2965 // Expand the select.
2966 Value *Cond = SI->getCondition();
2967 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2968 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
2969 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2970 Instruction *Term =
2971 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2972 BasicBlock *SplitBB = SI->getParent();
2973 BasicBlock *NewBB = Term->getParent();
2974 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
2975 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2976 NewPN->addIncoming(SI->getFalseValue(), BB);
2977 NewPN->setDebugLoc(SI->getDebugLoc());
2978 SI->replaceAllUsesWith(NewPN);
2979 SI->eraseFromParent();
2980 // NewBB and SplitBB are newly created blocks which require insertion.
2981 std::vector<DominatorTree::UpdateType> Updates;
2982 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2983 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2984 Updates.push_back({DominatorTree::Insert, BB, NewBB});
2985 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2986 // BB's successors were moved to SplitBB, update DTU accordingly.
2987 for (auto *Succ : successors(SplitBB)) {
2988 Updates.push_back({DominatorTree::Delete, BB, Succ});
2989 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2990 }
2991 DTU->applyUpdatesPermissive(Updates);
2992 return true;
2993 }
2994 return false;
2995}
2996
2997/// Try to propagate a guard from the current BB into one of its predecessors
2998/// in case if another branch of execution implies that the condition of this
2999/// guard is always true. Currently we only process the simplest case that
3000/// looks like:
3001///
3002/// Start:
3003/// %cond = ...
3004/// br i1 %cond, label %T1, label %F1
3005/// T1:
3006/// br label %Merge
3007/// F1:
3008/// br label %Merge
3009/// Merge:
3010/// %condGuard = ...
3011/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3012///
3013/// And cond either implies condGuard or !condGuard. In this case all the
3014/// instructions before the guard can be duplicated in both branches, and the
3015/// guard is then threaded to one of them.
3017 using namespace PatternMatch;
3018
3019 // We only want to deal with two predecessors.
3020 BasicBlock *Pred1, *Pred2;
3021 auto PI = pred_begin(BB), PE = pred_end(BB);
3022 if (PI == PE)
3023 return false;
3024 Pred1 = *PI++;
3025 if (PI == PE)
3026 return false;
3027 Pred2 = *PI++;
3028 if (PI != PE)
3029 return false;
3030 if (Pred1 == Pred2)
3031 return false;
3032
3033 // Try to thread one of the guards of the block.
3034 // TODO: Look up deeper than to immediate predecessor?
3035 auto *Parent = Pred1->getSinglePredecessor();
3036 if (!Parent || Parent != Pred2->getSinglePredecessor())
3037 return false;
3038
3039 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3040 for (auto &I : *BB)
3041 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3042 return true;
3043
3044 return false;
3045}
3046
3047/// Try to propagate the guard from BB which is the lower block of a diamond
3048/// to one of its branches, in case if diamond's condition implies guard's
3049/// condition.
3051 BranchInst *BI) {
3052 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3053 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3054 Value *GuardCond = Guard->getArgOperand(0);
3055 Value *BranchCond = BI->getCondition();
3056 BasicBlock *TrueDest = BI->getSuccessor(0);
3057 BasicBlock *FalseDest = BI->getSuccessor(1);
3058
3059 auto &DL = BB->getDataLayout();
3060 bool TrueDestIsSafe = false;
3061 bool FalseDestIsSafe = false;
3062
3063 // True dest is safe if BranchCond => GuardCond.
3064 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3065 if (Impl && *Impl)
3066 TrueDestIsSafe = true;
3067 else {
3068 // False dest is safe if !BranchCond => GuardCond.
3069 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3070 if (Impl && *Impl)
3071 FalseDestIsSafe = true;
3072 }
3073
3074 if (!TrueDestIsSafe && !FalseDestIsSafe)
3075 return false;
3076
3077 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3078 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3079
3080 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3081 Instruction *AfterGuard = Guard->getNextNode();
3082 unsigned Cost =
3083 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3084 if (Cost > BBDupThreshold)
3085 return false;
3086 // Duplicate all instructions before the guard and the guard itself to the
3087 // branch where implication is not proved.
3089 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3090 assert(GuardedBlock && "Could not create the guarded block?");
3091 // Duplicate all instructions before the guard in the unguarded branch.
3092 // Since we have successfully duplicated the guarded block and this block
3093 // has fewer instructions, we expect it to succeed.
3095 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3096 assert(UnguardedBlock && "Could not create the unguarded block?");
3097 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3098 << GuardedBlock->getName() << "\n");
3099 // Some instructions before the guard may still have uses. For them, we need
3100 // to create Phi nodes merging their copies in both guarded and unguarded
3101 // branches. Those instructions that have no uses can be just removed.
3103 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3104 if (!isa<PHINode>(&*BI))
3105 ToRemove.push_back(&*BI);
3106
3107 BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();
3108 assert(InsertionPoint != BB->end() && "Empty block?");
3109 // Substitute with Phis & remove.
3110 for (auto *Inst : reverse(ToRemove)) {
3111 if (!Inst->use_empty()) {
3112 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3113 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3114 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3115 NewPN->setDebugLoc(Inst->getDebugLoc());
3116 NewPN->insertBefore(InsertionPoint);
3117 Inst->replaceAllUsesWith(NewPN);
3118 }
3119 Inst->dropDbgRecords();
3120 Inst->eraseFromParent();
3121 }
3122 return true;
3123}
3124
3125PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3129
3130 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3131 // TODO: Would be nice to verify BPI/BFI consistency as well.
3132 return PA;
3133}
3134
3135template <typename AnalysisT>
3136typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3137 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3138
3139 // If there were no changes since last call to 'runExternalAnalysis' then all
3140 // analysis is either up to date or explicitly invalidated. Just go ahead and
3141 // run the "external" analysis.
3142 if (!ChangedSinceLastAnalysisUpdate) {
3143 assert(!DTU->hasPendingUpdates() &&
3144 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3145 // Run the "external" analysis.
3146 return &FAM->getResult<AnalysisT>(*F);
3147 }
3148 ChangedSinceLastAnalysisUpdate = false;
3149
3150 auto PA = getPreservedAnalysis();
3151 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3152 // as preserved.
3153 PA.preserve<BranchProbabilityAnalysis>();
3154 PA.preserve<BlockFrequencyAnalysis>();
3155 // Report everything except explicitly preserved as invalid.
3156 FAM->invalidate(*F, PA);
3157 // Update DT/PDT.
3158 DTU->flush();
3159 // Make sure DT/PDT are valid before running "external" analysis.
3160 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3161 assert((!DTU->hasPostDomTree() ||
3162 DTU->getPostDomTree().verify(
3164 // Run the "external" analysis.
3165 auto *Result = &FAM->getResult<AnalysisT>(*F);
3166 // Update analysis JumpThreading depends on and not explicitly preserved.
3167 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3168 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3169 AA = &FAM->getResult<AAManager>(*F);
3170
3171 return Result;
3172}
3173
3174BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3175 if (!BPI) {
3176 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3178 }
3179 return *BPI;
3180}
3181
3182BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3183 if (!BFI) {
3184 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3186 }
3187 return *BFI;
3188}
3189
3190// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3191// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3192// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3193BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3194 auto *Res = getBPI();
3195 if (Res)
3196 return Res;
3197
3198 if (Force)
3199 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3200
3201 return *BPI;
3202}
3203
3204BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3205 auto *Res = getBFI();
3206 if (Res)
3207 return Res;
3208
3209 if (Force)
3210 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3211
3212 return *BFI;
3213}
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
#define P(N)
ppc ctr loops verify
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
Definition: BasicBlock.cpp:52
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:287
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition: Constants.h:890
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1871
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2605
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:42
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:723
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Per-instruction record of debug-info.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
const BasicBlock * getParent() const
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:78
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1727
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1713
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
bool isSpecialTerminator() const
Definition: Instruction.h:284
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:32
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Constant * getPredicateOnEdge(CmpInst::Predicate Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
Constant * getPredicateAt(CmpInst::Predicate Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
An instruction for reading from memory.
Definition: Instructions.h:174
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:218
bool isUnordered() const
Definition: Instructions.h:247
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:228
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
static LocationSize precise(uint64_t Value)
Metadata node.
Definition: Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Representation for a specific memory location.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:193
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:188
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:200
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:95
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
'undef' values are things that do not have specified contents.
Definition: Constants.h:1398
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1067
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1096
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:3470
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:608
void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition: Local.cpp:3692
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:731
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:479
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:138
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1118
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:771
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1961
Value * simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1997
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
unsigned pred_size(const MachineBasicBlock *BB)
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1459