LLVM 23.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <memory>
76#include <utility>
77
78using namespace llvm;
79using namespace jumpthreading;
80
81#define DEBUG_TYPE "jump-threading"
82
83STATISTIC(NumThreads, "Number of jumps threaded");
84STATISTIC(NumFolds, "Number of terminators folded");
85STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86
88BBDuplicateThreshold("jump-threading-threshold",
89 cl::desc("Max block size to duplicate for jump threading"),
91
94 "jump-threading-implication-search-threshold",
95 cl::desc("The number of predecessors to search for a stronger "
96 "condition to use to thread over a weaker condition"),
98
100 "jump-threading-phi-threshold",
101 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
102 cl::Hidden);
103
105 "jump-threading-across-loop-headers",
106 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
107 cl::init(false), cl::Hidden);
108
110 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
111}
112
113// Update branch probability information according to conditional
114// branch probability. This is usually made possible for cloned branches
115// in inline instances by the context specific profile in the caller.
116// For instance,
117//
118// [Block PredBB]
119// [Branch PredBr]
120// if (t) {
121// Block A;
122// } else {
123// Block B;
124// }
125//
126// [Block BB]
127// cond = PN([true, %A], [..., %B]); // PHI node
128// [Branch CondBr]
129// if (cond) {
130// ... // P(cond == true) = 1%
131// }
132//
133// Here we know that when block A is taken, cond must be true, which means
134// P(cond == true | A) = 1
135//
136// Given that P(cond == true) = P(cond == true | A) * P(A) +
137// P(cond == true | B) * P(B)
138// we get:
139// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
140//
141// which gives us:
142// P(A) is less than P(cond == true), i.e.
143// P(t == true) <= P(cond == true)
144//
145// In other words, if we know P(cond == true) is unlikely, we know
146// that P(t == true) is also unlikely.
147//
150 if (!CondBr)
151 return;
152
153 uint64_t TrueWeight, FalseWeight;
154 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
155 return;
156
157 if (TrueWeight + FalseWeight == 0)
158 // Zero branch_weights do not give a hint for getting branch probabilities.
159 // Technically it would result in division by zero denominator, which is
160 // TrueWeight + FalseWeight.
161 return;
162
163 // Returns the outgoing edge of the dominating predecessor block
164 // that leads to the PhiNode's incoming block:
165 auto GetPredOutEdge =
166 [](BasicBlock *IncomingBB,
167 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
168 auto *PredBB = IncomingBB;
169 auto *SuccBB = PhiBB;
171 while (true) {
172 if (isa<CondBrInst>(PredBB->getTerminator()))
173 return {PredBB, SuccBB};
174 Visited.insert(PredBB);
175 auto *SinglePredBB = PredBB->getSinglePredecessor();
176 if (!SinglePredBB)
177 return {nullptr, nullptr};
178
179 // Stop searching when SinglePredBB has been visited. It means we see
180 // an unreachable loop.
181 if (Visited.count(SinglePredBB))
182 return {nullptr, nullptr};
183
184 SuccBB = PredBB;
185 PredBB = SinglePredBB;
186 }
187 };
188
189 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
190 Value *PhiOpnd = PN->getIncomingValue(i);
191 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
192
193 if (!CI || !CI->getType()->isIntegerTy(1))
194 continue;
195
198 TrueWeight, TrueWeight + FalseWeight)
200 FalseWeight, TrueWeight + FalseWeight));
201
202 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
203 if (!PredOutEdge.first)
204 return;
205
206 BasicBlock *PredBB = PredOutEdge.first;
207 CondBrInst *PredBr = dyn_cast<CondBrInst>(PredBB->getTerminator());
208 if (!PredBr)
209 return;
210
211 uint64_t PredTrueWeight, PredFalseWeight;
212 // FIXME: We currently only set the profile data when it is missing.
213 // With PGO, this can be used to refine even existing profile data with
214 // context information. This needs to be done after more performance
215 // testing.
216 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
217 continue;
218
219 // We can not infer anything useful when BP >= 50%, because BP is the
220 // upper bound probability value.
221 if (BP >= BranchProbability(50, 100))
222 continue;
223
224 uint32_t Weights[2];
225 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
226 Weights[0] = BP.getNumerator();
227 Weights[1] = BP.getCompl().getNumerator();
228 } else {
229 Weights[0] = BP.getCompl().getNumerator();
230 Weights[1] = BP.getNumerator();
231 }
232 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
233 }
234}
235
238 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
239 // Jump Threading has no sense for the targets with divergent CF
240 if (TTI.hasBranchDivergence(&F))
241 return PreservedAnalyses::all();
242 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
243 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
244 auto &AA = AM.getResult<AAManager>(F);
245 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
246
247 bool Changed =
248 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
249 std::make_unique<DomTreeUpdater>(
250 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
251 nullptr, nullptr);
252
253 if (!Changed)
254 return PreservedAnalyses::all();
255
256
258
259#if defined(EXPENSIVE_CHECKS)
261 DominatorTree::VerificationLevel::Full) &&
262 "DT broken after JumpThreading");
263 assert((!getDomTreeUpdater()->hasPostDomTree() ||
264 getDomTreeUpdater()->getPostDomTree().verify(
265 PostDominatorTree::VerificationLevel::Full)) &&
266 "PDT broken after JumpThreading");
267#else
269 DominatorTree::VerificationLevel::Fast) &&
270 "DT broken after JumpThreading");
271 assert((!getDomTreeUpdater()->hasPostDomTree() ||
272 getDomTreeUpdater()->getPostDomTree().verify(
273 PostDominatorTree::VerificationLevel::Fast)) &&
274 "PDT broken after JumpThreading");
275#endif
276
277 return getPreservedAnalysis();
278}
279
281 TargetLibraryInfo *TLI_,
283 AliasAnalysis *AA_,
284 std::unique_ptr<DomTreeUpdater> DTU_,
285 BlockFrequencyInfo *BFI_,
286 BranchProbabilityInfo *BPI_) {
287 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
288 F = &F_;
289 FAM = FAM_;
290 TLI = TLI_;
291 TTI = TTI_;
292 LVI = LVI_;
293 AA = AA_;
294 DTU = std::move(DTU_);
295 BFI = BFI_;
296 BPI = BPI_;
297 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
298 F->getParent(), Intrinsic::experimental_guard);
299 HasGuards = GuardDecl && !GuardDecl->use_empty();
300
301 // Reduce the number of instructions duplicated when optimizing strictly for
302 // size.
303 if (BBDuplicateThreshold.getNumOccurrences())
304 BBDupThreshold = BBDuplicateThreshold;
305 else if (F->hasMinSize())
306 BBDupThreshold = 3;
307 else
308 BBDupThreshold = DefaultBBDupThreshold;
309
310 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
311 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
312 DominatorTree &DT = DTU->getDomTree();
313
314 Unreachable.clear();
315 for (auto &BB : *F)
316 if (!DT.isReachableFromEntry(&BB))
317 Unreachable.insert(&BB);
318
320 findLoopHeaders(*F);
321
322 bool EverChanged = false;
323 bool Changed;
324 do {
325 Changed = false;
326 for (auto &BB : *F) {
327 if (Unreachable.count(&BB))
328 continue;
329 while (processBlock(&BB)) // Thread all of the branches we can over BB.
330 Changed = ChangedSinceLastAnalysisUpdate = true;
331
332 // Stop processing BB if it's the entry or is now deleted. The following
333 // routines attempt to eliminate BB and locating a suitable replacement
334 // for the entry is non-trivial.
335 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
336 continue;
337
338 if (pred_empty(&BB)) {
339 // When processBlock makes BB unreachable it doesn't bother to fix up
340 // the instructions in it. We must remove BB to prevent invalid IR.
341 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
342 << "' with terminator: " << *BB.getTerminator()
343 << '\n');
344 LoopHeaders.erase(&BB);
345 LVI->eraseBlock(&BB);
346 DeleteDeadBlock(&BB, DTU.get());
347 Changed = ChangedSinceLastAnalysisUpdate = true;
348 continue;
349 }
350
351 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
352 // is "almost empty", we attempt to merge BB with its sole successor.
353 if (auto *BI = dyn_cast<UncondBrInst>(BB.getTerminator())) {
354 BasicBlock *Succ = BI->getSuccessor();
355 if (
356 // The terminator must be the only non-phi instruction in BB.
357 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
358 // Don't alter Loop headers and latches to ensure another pass can
359 // detect and transform nested loops later.
360 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
362 // BB is valid for cleanup here because we passed in DTU. F remains
363 // BB's parent until a DTU->getDomTree() event.
364 LVI->eraseBlock(&BB);
365 Changed = ChangedSinceLastAnalysisUpdate = true;
366 }
367 }
368 }
369 EverChanged |= Changed;
370 } while (Changed);
371
372 // Jump threading may have introduced redundant debug values into F which
373 // should be removed.
374 if (EverChanged)
375 for (auto &BB : *F) {
377 }
378
379 LoopHeaders.clear();
380 return EverChanged;
381}
382
383// Replace uses of Cond with ToVal when safe to do so. If all uses are
384// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
385// because we may incorrectly replace uses when guards/assumes are uses of
386// of `Cond` and we used the guards/assume to reason about the `Cond` value
387// at the end of block. RAUW unconditionally replaces all uses
388// including the guards/assumes themselves and the uses before the
389// guard/assume.
391 BasicBlock *KnownAtEndOfBB) {
392 bool Changed = false;
393 assert(Cond->getType() == ToVal->getType());
394 // We can unconditionally replace all uses in non-local blocks (i.e. uses
395 // strictly dominated by BB), since LVI information is true from the
396 // terminator of BB.
397 if (Cond->getParent() == KnownAtEndOfBB)
399 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
400 // Replace any debug-info record users of Cond with ToVal.
401 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
402 DVR.replaceVariableLocationOp(Cond, ToVal, true);
403
404 // Reached the Cond whose uses we are trying to replace, so there are no
405 // more uses.
406 if (&I == Cond)
407 break;
408 // We only replace uses in instructions that are guaranteed to reach the end
409 // of BB, where we know Cond is ToVal.
411 break;
412 Changed |= I.replaceUsesOfWith(Cond, ToVal);
413 }
414 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
415 Cond->eraseFromParent();
416 Changed = true;
417 }
418 return Changed;
419}
420
421/// Return the cost of duplicating a piece of this block from first non-phi
422/// and before StopAt instruction to thread across it. Stop scanning the block
423/// when exceeding the threshold. If duplication is impossible, returns ~0U.
425 BasicBlock *BB,
427 unsigned Threshold) {
428 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
429
430 // Do not duplicate the BB if it has a lot of PHI nodes.
431 // If a threadable chain is too long then the number of PHI nodes can add up,
432 // leading to a substantial increase in compile time when rewriting the SSA.
433 unsigned PhiCount = 0;
434 Instruction *FirstNonPHI = nullptr;
435 for (Instruction &I : *BB) {
436 if (!isa<PHINode>(&I)) {
437 FirstNonPHI = &I;
438 break;
439 }
440 if (++PhiCount > PhiDuplicateThreshold)
441 return ~0U;
442 }
443
444 /// Ignore PHI nodes, these will be flattened when duplication happens.
445 BasicBlock::const_iterator I(FirstNonPHI);
446
447 // FIXME: THREADING will delete values that are just used to compute the
448 // branch, so they shouldn't count against the duplication cost.
449
450 unsigned Bonus = 0;
451 if (BB->getTerminator() == StopAt) {
452 // Threading through a switch statement is particularly profitable. If this
453 // block ends in a switch, decrease its cost to make it more likely to
454 // happen.
456 Bonus = 6;
457
458 // The same holds for indirect branches, but slightly more so.
460 Bonus = 8;
461 }
462
463 // Bump the threshold up so the early exit from the loop doesn't skip the
464 // terminator-based Size adjustment at the end.
465 Threshold += Bonus;
466
467 // Sum up the cost of each instruction until we get to the terminator. Don't
468 // include the terminator because the copy won't include it.
469 unsigned Size = 0;
470 for (; &*I != StopAt; ++I) {
471
472 // Stop scanning the block if we've reached the threshold.
473 if (Size > Threshold)
474 return Size;
475
476 // Bail out if this instruction gives back a token type, it is not possible
477 // to duplicate it if it is used outside this BB.
478 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
479 return ~0U;
480
481 // Blocks with NoDuplicate are modelled as having infinite cost, so they
482 // are never duplicated.
483 if (const CallInst *CI = dyn_cast<CallInst>(I))
484 if (CI->cannotDuplicate() || CI->isConvergent())
485 return ~0U;
486
487 if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) ==
489 continue;
490
491 // All other instructions count for at least one unit.
492 ++Size;
493
494 // Calls are more expensive. If they are non-intrinsic calls, we model them
495 // as having cost of 4. If they are a non-vector intrinsic, we model them
496 // as having cost of 2 total, and if they are a vector intrinsic, we model
497 // them as having cost 1.
498 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
499 if (!isa<IntrinsicInst>(CI))
500 Size += 3;
501 else if (!CI->getType()->isVectorTy())
502 Size += 1;
503 }
504 }
505
506 return Size > Bonus ? Size - Bonus : 0;
507}
508
509/// findLoopHeaders - We do not want jump threading to turn proper loop
510/// structures into irreducible loops. Doing this breaks up the loop nesting
511/// hierarchy and pessimizes later transformations. To prevent this from
512/// happening, we first have to find the loop headers. Here we approximate this
513/// by finding targets of backedges in the CFG.
514///
515/// Note that there definitely are cases when we want to allow threading of
516/// edges across a loop header. For example, threading a jump from outside the
517/// loop (the preheader) to an exit block of the loop is definitely profitable.
518/// It is also almost always profitable to thread backedges from within the loop
519/// to exit blocks, and is often profitable to thread backedges to other blocks
520/// within the loop (forming a nested loop). This simple analysis is not rich
521/// enough to track all of these properties and keep it up-to-date as the CFG
522/// mutates, so we don't allow any of these transformations.
528
529/// getKnownConstant - Helper method to determine if we can thread over a
530/// terminator with the given value as its condition, and if so what value to
531/// use for that. What kind of value this is depends on whether we want an
532/// integer or a block address, but an undef is always accepted.
533/// Returns null if Val is null or not an appropriate constant.
535 if (!Val)
536 return nullptr;
537
538 // Undef is "known" enough.
539 if (UndefValue *U = dyn_cast<UndefValue>(Val))
540 return U;
541
542 if (Preference == WantBlockAddress)
544
545 return dyn_cast<ConstantInt>(Val);
546}
547
548/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
549/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
550/// in any of our predecessors. If so, return the known list of value and pred
551/// BB in the result vector.
552///
553/// This returns true if there were any known values.
555 Value *V, BasicBlock *BB, PredValueInfo &Result,
556 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
557 Instruction *CxtI) {
558 const DataLayout &DL = BB->getDataLayout();
559
560 // This method walks up use-def chains recursively. Because of this, we could
561 // get into an infinite loop going around loops in the use-def chain. To
562 // prevent this, keep track of what (value, block) pairs we've already visited
563 // and terminate the search if we loop back to them
564 if (!RecursionSet.insert(V).second)
565 return false;
566
567 // If V is a constant, then it is known in all predecessors.
568 if (Constant *KC = getKnownConstant(V, Preference)) {
569 for (BasicBlock *Pred : predecessors(BB))
570 Result.emplace_back(KC, Pred);
571
572 return !Result.empty();
573 }
574
575 // If V is a non-instruction value, or an instruction in a different block,
576 // then it can't be derived from a PHI.
578 if (!I || I->getParent() != BB) {
579
580 // Okay, if this is a live-in value, see if it has a known value at the any
581 // edge from our predecessors.
582 for (BasicBlock *P : predecessors(BB)) {
583 using namespace PatternMatch;
584 // If the value is known by LazyValueInfo to be a constant in a
585 // predecessor, use that information to try to thread this block.
586 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
587 // If I is a non-local compare-with-constant instruction, use more-rich
588 // 'getPredicateOnEdge' method. This would be able to handle value
589 // inequalities better, for example if the compare is "X < 4" and "X < 3"
590 // is known true but "X < 4" itself is not available.
591 CmpPredicate Pred;
592 Value *Val;
593 Constant *Cst;
594 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
595 PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
596 if (Constant *KC = getKnownConstant(PredCst, Preference))
597 Result.emplace_back(KC, P);
598 }
599
600 return !Result.empty();
601 }
602
603 /// If I is a PHI node, then we know the incoming values for any constants.
604 if (PHINode *PN = dyn_cast<PHINode>(I)) {
605 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
606 Value *InVal = PN->getIncomingValue(i);
607 if (Constant *KC = getKnownConstant(InVal, Preference)) {
608 Result.emplace_back(KC, PN->getIncomingBlock(i));
609 } else {
610 Constant *CI = LVI->getConstantOnEdge(InVal,
611 PN->getIncomingBlock(i),
612 BB, CxtI);
613 if (Constant *KC = getKnownConstant(CI, Preference))
614 Result.emplace_back(KC, PN->getIncomingBlock(i));
615 }
616 }
617
618 return !Result.empty();
619 }
620
621 // Handle Cast instructions.
622 if (CastInst *CI = dyn_cast<CastInst>(I)) {
623 Value *Source = CI->getOperand(0);
624 PredValueInfoTy Vals;
625 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
626 RecursionSet, CxtI);
627 if (Vals.empty())
628 return false;
629
630 // Convert the known values.
631 for (auto &Val : Vals)
632 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
633 CI->getType(), DL))
634 Result.emplace_back(Folded, Val.second);
635
636 return !Result.empty();
637 }
638
639 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
640 Value *Source = FI->getOperand(0);
641 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
642 RecursionSet, CxtI);
643
644 erase_if(Result, [](auto &Pair) {
645 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
646 });
647
648 return !Result.empty();
649 }
650
651 // Handle some boolean conditions.
652 if (I->getType()->getPrimitiveSizeInBits() == 1) {
653 using namespace PatternMatch;
654 if (Preference != WantInteger)
655 return false;
656 // X | true -> true
657 // X & false -> false
658 Value *Op0, *Op1;
659 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
660 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
661 PredValueInfoTy LHSVals, RHSVals;
662
664 RecursionSet, CxtI);
666 RecursionSet, CxtI);
667
668 if (LHSVals.empty() && RHSVals.empty())
669 return false;
670
671 ConstantInt *InterestingVal;
672 if (match(I, m_LogicalOr()))
673 InterestingVal = ConstantInt::getTrue(I->getContext());
674 else
675 InterestingVal = ConstantInt::getFalse(I->getContext());
676
677 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
678
679 // Scan for the sentinel. If we find an undef, force it to the
680 // interesting value: x|undef -> true and x&undef -> false.
681 for (const auto &LHSVal : LHSVals)
682 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
683 Result.emplace_back(InterestingVal, LHSVal.second);
684 LHSKnownBBs.insert(LHSVal.second);
685 }
686 for (const auto &RHSVal : RHSVals)
687 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
688 // If we already inferred a value for this block on the LHS, don't
689 // re-add it.
690 if (!LHSKnownBBs.count(RHSVal.second))
691 Result.emplace_back(InterestingVal, RHSVal.second);
692 }
693
694 return !Result.empty();
695 }
696
697 // Handle the NOT form of XOR.
698 if (I->getOpcode() == Instruction::Xor &&
699 isa<ConstantInt>(I->getOperand(1)) &&
700 cast<ConstantInt>(I->getOperand(1))->isOne()) {
701 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
702 WantInteger, RecursionSet, CxtI);
703 if (Result.empty())
704 return false;
705
706 // Invert the known values.
707 for (auto &R : Result)
708 R.first = ConstantExpr::getNot(R.first);
709
710 return true;
711 }
712
713 // Try to simplify some other binary operator values.
714 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
715 if (Preference != WantInteger)
716 return false;
717 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
718 PredValueInfoTy LHSVals;
719 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
720 WantInteger, RecursionSet, CxtI);
721
722 // Try to use constant folding to simplify the binary operator.
723 for (const auto &LHSVal : LHSVals) {
724 Constant *V = LHSVal.first;
725 Constant *Folded =
726 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
727
728 if (Constant *KC = getKnownConstant(Folded, WantInteger))
729 Result.emplace_back(KC, LHSVal.second);
730 }
731 }
732
733 return !Result.empty();
734 }
735
736 // Handle compare with phi operand, where the PHI is defined in this block.
737 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
738 if (Preference != WantInteger)
739 return false;
740 Type *CmpType = Cmp->getType();
741 Value *CmpLHS = Cmp->getOperand(0);
742 Value *CmpRHS = Cmp->getOperand(1);
743 CmpInst::Predicate Pred = Cmp->getPredicate();
744
745 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
746 if (!PN)
747 PN = dyn_cast<PHINode>(CmpRHS);
748 // Do not perform phi translation across a loop header phi, because this
749 // may result in comparison of values from two different loop iterations.
750 // FIXME: This check is broken if LoopHeaders is not populated.
751 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
752 const DataLayout &DL = PN->getDataLayout();
753 // We can do this simplification if any comparisons fold to true or false.
754 // See if any do.
755 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
756 BasicBlock *PredBB = PN->getIncomingBlock(i);
757 Value *LHS, *RHS;
758 if (PN == CmpLHS) {
759 LHS = PN->getIncomingValue(i);
760 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
761 } else {
762 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
763 RHS = PN->getIncomingValue(i);
764 }
765 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
766 if (!Res) {
767 if (!isa<Constant>(RHS))
768 continue;
769
770 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
771 auto LHSInst = dyn_cast<Instruction>(LHS);
772 if (LHSInst && LHSInst->getParent() == BB)
773 continue;
774
775 Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
776 BB, CxtI ? CxtI : Cmp);
777 }
778
779 if (Constant *KC = getKnownConstant(Res, WantInteger))
780 Result.emplace_back(KC, PredBB);
781 }
782
783 return !Result.empty();
784 }
785
786 // If comparing a live-in value against a constant, see if we know the
787 // live-in value on any predecessors.
788 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
789 Constant *CmpConst = cast<Constant>(CmpRHS);
790
791 if (!isa<Instruction>(CmpLHS) ||
792 cast<Instruction>(CmpLHS)->getParent() != BB) {
793 for (BasicBlock *P : predecessors(BB)) {
794 // If the value is known by LazyValueInfo to be a constant in a
795 // predecessor, use that information to try to thread this block.
796 Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
797 CxtI ? CxtI : Cmp);
798 if (Constant *KC = getKnownConstant(Res, WantInteger))
799 Result.emplace_back(KC, P);
800 }
801
802 return !Result.empty();
803 }
804
805 // InstCombine can fold some forms of constant range checks into
806 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
807 // x as a live-in.
808 {
809 using namespace PatternMatch;
810
811 Value *AddLHS;
812 ConstantInt *AddConst;
813 if (isa<ConstantInt>(CmpConst) &&
814 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
815 if (!isa<Instruction>(AddLHS) ||
816 cast<Instruction>(AddLHS)->getParent() != BB) {
817 for (BasicBlock *P : predecessors(BB)) {
818 // If the value is known by LazyValueInfo to be a ConstantRange in
819 // a predecessor, use that information to try to thread this
820 // block.
821 ConstantRange CR = LVI->getConstantRangeOnEdge(
822 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
823 // Propagate the range through the addition.
824 CR = CR.add(AddConst->getValue());
825
826 // Get the range where the compare returns true.
828 Pred, cast<ConstantInt>(CmpConst)->getValue());
829
830 Constant *ResC;
831 if (CmpRange.contains(CR))
832 ResC = ConstantInt::getTrue(CmpType);
833 else if (CmpRange.inverse().contains(CR))
834 ResC = ConstantInt::getFalse(CmpType);
835 else
836 continue;
837
838 Result.emplace_back(ResC, P);
839 }
840
841 return !Result.empty();
842 }
843 }
844 }
845
846 // Try to find a constant value for the LHS of a comparison,
847 // and evaluate it statically if we can.
848 PredValueInfoTy LHSVals;
849 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
850 WantInteger, RecursionSet, CxtI);
851
852 for (const auto &LHSVal : LHSVals) {
853 Constant *V = LHSVal.first;
854 Constant *Folded =
855 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
856 if (Constant *KC = getKnownConstant(Folded, WantInteger))
857 Result.emplace_back(KC, LHSVal.second);
858 }
859
860 return !Result.empty();
861 }
862 }
863
865 // Handle select instructions where at least one operand is a known constant
866 // and we can figure out the condition value for any predecessor block.
867 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
868 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
869 PredValueInfoTy Conds;
870 if ((TrueVal || FalseVal) &&
871 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
872 WantInteger, RecursionSet, CxtI)) {
873 for (auto &C : Conds) {
874 Constant *Cond = C.first;
875
876 // Figure out what value to use for the condition.
877 bool KnownCond;
879 // A known boolean.
880 KnownCond = CI->isOne();
881 } else {
882 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
883 // Either operand will do, so be sure to pick the one that's a known
884 // constant.
885 // FIXME: Do this more cleverly if both values are known constants?
886 KnownCond = (TrueVal != nullptr);
887 }
888
889 // See if the select has a known constant value for this predecessor.
890 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
891 Result.emplace_back(Val, C.second);
892 }
893
894 return !Result.empty();
895 }
896 }
897
898 // If all else fails, see if LVI can figure out a constant value for us.
899 assert(CxtI->getParent() == BB && "CxtI should be in BB");
900 Constant *CI = LVI->getConstant(V, CxtI);
901 if (Constant *KC = getKnownConstant(CI, Preference)) {
902 for (BasicBlock *Pred : predecessors(BB))
903 Result.emplace_back(KC, Pred);
904 }
905
906 return !Result.empty();
907}
908
909/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
910/// in an undefined jump, decide which block is best to revector to.
911///
912/// Since we can pick an arbitrary destination, we pick the successor with the
913/// fewest predecessors. This should reduce the in-degree of the others.
915 Instruction *BBTerm = BB->getTerminator();
916 unsigned MinSucc = 0;
917 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
918 // Compute the successor with the minimum number of predecessors.
919 unsigned MinNumPreds = pred_size(TestBB);
920 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
921 TestBB = BBTerm->getSuccessor(i);
922 unsigned NumPreds = pred_size(TestBB);
923 if (NumPreds < MinNumPreds) {
924 MinSucc = i;
925 MinNumPreds = NumPreds;
926 }
927 }
928
929 return MinSucc;
930}
931
933 if (!BB->hasAddressTaken()) return false;
934
935 // If the block has its address taken, it may be a tree of dead constants
936 // hanging off of it. These shouldn't keep the block alive.
939 return !BA->use_empty();
940}
941
942/// processBlock - If there are any predecessors whose control can be threaded
943/// through to a successor, transform them now.
945 // If the block is trivially dead, just return and let the caller nuke it.
946 // This simplifies other transformations.
947 if (DTU->isBBPendingDeletion(BB) ||
948 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
949 return false;
950
951 // If this block has a single predecessor, and if that pred has a single
952 // successor, merge the blocks. This encourages recursive jump threading
953 // because now the condition in this block can be threaded through
954 // predecessors of our predecessor block.
956 return true;
957
959 return true;
960
961 // Look if we can propagate guards to predecessors.
962 if (HasGuards && processGuards(BB))
963 return true;
964
965 // What kind of constant we're looking for.
966 ConstantPreference Preference = WantInteger;
967
968 // Look to see if the terminator is a conditional branch, switch or indirect
969 // branch, if not we can't thread it.
970 Value *Condition;
971 Instruction *Terminator = BB->getTerminator();
972 if (CondBrInst *BI = dyn_cast<CondBrInst>(Terminator)) {
973 Condition = BI->getCondition();
974 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
975 Condition = SI->getCondition();
976 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
977 // Can't thread indirect branch with no successors.
978 if (IB->getNumSuccessors() == 0) return false;
979 Condition = IB->getAddress()->stripPointerCasts();
980 Preference = WantBlockAddress;
981 } else {
982 return false; // Must be an invoke or callbr.
983 }
984
985 // Keep track if we constant folded the condition in this invocation.
986 bool ConstantFolded = false;
987
988 // Run constant folding to see if we can reduce the condition to a simple
989 // constant.
990 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
991 Value *SimpleVal =
993 if (SimpleVal) {
994 I->replaceAllUsesWith(SimpleVal);
996 I->eraseFromParent();
997 Condition = SimpleVal;
998 ConstantFolded = true;
999 }
1000 }
1001
1002 // If the terminator is branching on an undef or freeze undef, we can pick any
1003 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1004 auto *FI = dyn_cast<FreezeInst>(Condition);
1005 if (isa<UndefValue>(Condition) ||
1006 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1007 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1008 std::vector<DominatorTree::UpdateType> Updates;
1009
1010 // Fold the branch/switch.
1011 Instruction *BBTerm = BB->getTerminator();
1012 Updates.reserve(BBTerm->getNumSuccessors());
1013 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1014 if (i == BestSucc) continue;
1015 BasicBlock *Succ = BBTerm->getSuccessor(i);
1016 Succ->removePredecessor(BB, true);
1017 Updates.push_back({DominatorTree::Delete, BB, Succ});
1018 }
1019
1020 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1021 << "' folding undef terminator: " << *BBTerm << '\n');
1022 Instruction *NewBI = UncondBrInst::Create(BBTerm->getSuccessor(BestSucc),
1023 BBTerm->getIterator());
1024 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1025 ++NumFolds;
1026 BBTerm->eraseFromParent();
1027 DTU->applyUpdatesPermissive(Updates);
1028 if (FI)
1029 FI->eraseFromParent();
1030 return true;
1031 }
1032
1033 // If the terminator of this block is branching on a constant, simplify the
1034 // terminator to an unconditional branch. This can occur due to threading in
1035 // other blocks.
1036 if (getKnownConstant(Condition, Preference)) {
1037 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1038 << "' folding terminator: " << *BB->getTerminator()
1039 << '\n');
1040 ++NumFolds;
1041 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1042 if (auto *BPI = getBPI())
1043 BPI->eraseBlock(BB);
1044 return true;
1045 }
1046
1047 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1048
1049 // All the rest of our checks depend on the condition being an instruction.
1050 if (!CondInst) {
1051 // FIXME: Unify this with code below.
1052 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1053 return true;
1054 return ConstantFolded;
1055 }
1056
1057 // Some of the following optimization can safely work on the unfrozen cond.
1058 Value *CondWithoutFreeze = CondInst;
1059 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1060 CondWithoutFreeze = FI->getOperand(0);
1061
1062 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1063 // If we're branching on a conditional, LVI might be able to determine
1064 // it's value at the branch instruction. We only handle comparisons
1065 // against a constant at this time.
1066 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1067 Constant *Res =
1068 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1069 CondConst, BB->getTerminator(),
1070 /*UseBlockValue=*/false);
1071 if (Res) {
1072 // We can safely replace *some* uses of the CondInst if it has
1073 // exactly one value as returned by LVI. RAUW is incorrect in the
1074 // presence of guards and assumes, that have the `Cond` as the use. This
1075 // is because we use the guards/assume to reason about the `Cond` value
1076 // at the end of block, but RAUW unconditionally replaces all uses
1077 // including the guards/assumes themselves and the uses before the
1078 // guard/assume.
1079 if (replaceFoldableUses(CondCmp, Res, BB))
1080 return true;
1081 }
1082
1083 // We did not manage to simplify this branch, try to see whether
1084 // CondCmp depends on a known phi-select pattern.
1085 if (tryToUnfoldSelect(CondCmp, BB))
1086 return true;
1087 }
1088 }
1089
1091 if (tryToUnfoldSelect(SI, BB))
1092 return true;
1093
1094 // Check for some cases that are worth simplifying. Right now we want to look
1095 // for loads that are used by a switch or by the condition for the branch. If
1096 // we see one, check to see if it's partially redundant. If so, insert a PHI
1097 // which can then be used to thread the values.
1098 Value *SimplifyValue = CondWithoutFreeze;
1099
1100 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1101 if (isa<Constant>(CondCmp->getOperand(1)))
1102 SimplifyValue = CondCmp->getOperand(0);
1103
1104 // TODO: There are other places where load PRE would be profitable, such as
1105 // more complex comparisons.
1106 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1108 return true;
1109
1110 // Before threading, try to propagate profile data backwards:
1111 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1112 if (PN->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1114
1115 // Handle a variety of cases where we are branching on something derived from
1116 // a PHI node in the current block. If we can prove that any predecessors
1117 // compute a predictable value based on a PHI node, thread those predecessors.
1118 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1119 return true;
1120
1121 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1122 // the current block, see if we can simplify.
1123 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1124 if (PN && PN->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1125 return processBranchOnPHI(PN);
1126
1127 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1128 if (CondInst->getOpcode() == Instruction::Xor &&
1129 CondInst->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1130 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1131
1132 // Search for a stronger dominating condition that can be used to simplify a
1133 // conditional branch leaving BB.
1135 return true;
1136
1137 return false;
1138}
1139
1141 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
1142 if (!BI)
1143 return false;
1144
1145 Value *Cond = BI->getCondition();
1146 // Assuming that predecessor's branch was taken, if pred's branch condition
1147 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1148 // freeze(Cond) is either true or a nondeterministic value.
1149 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1150 // without affecting other instructions.
1151 auto *FICond = dyn_cast<FreezeInst>(Cond);
1152 if (FICond && FICond->hasOneUse())
1153 Cond = FICond->getOperand(0);
1154 else
1155 FICond = nullptr;
1156
1157 BasicBlock *CurrentBB = BB;
1158 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1159 unsigned Iter = 0;
1160
1161 auto &DL = BB->getDataLayout();
1162
1163 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1164 auto *PBI = dyn_cast<CondBrInst>(CurrentPred->getTerminator());
1165 if (!PBI)
1166 return false;
1167 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1168 return false;
1169
1170 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1171 std::optional<bool> Implication =
1172 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1173
1174 // If the branch condition of BB (which is Cond) and CurrentPred are
1175 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1176 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1177 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1178 FICond->getOperand(0))
1179 Implication = CondIsTrue;
1180 }
1181
1182 if (Implication) {
1183 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1184 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1185 RemoveSucc->removePredecessor(BB);
1186 UncondBrInst *UncondBI =
1187 UncondBrInst::Create(KeepSucc, BI->getIterator());
1188 UncondBI->setDebugLoc(BI->getDebugLoc());
1189 ++NumFolds;
1190 BI->eraseFromParent();
1191 if (FICond)
1192 FICond->eraseFromParent();
1193
1194 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1195 if (auto *BPI = getBPI())
1196 BPI->eraseBlock(BB);
1197 return true;
1198 }
1199 CurrentBB = CurrentPred;
1200 CurrentPred = CurrentBB->getSinglePredecessor();
1201 }
1202
1203 return false;
1204}
1205
1206/// Return true if Op is an instruction defined in the given block.
1208 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1209 if (OpInst->getParent() == BB)
1210 return true;
1211 return false;
1212}
1213
1214/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1215/// redundant load instruction, eliminate it by replacing it with a PHI node.
1216/// This is an important optimization that encourages jump threading, and needs
1217/// to be run interlaced with other jump threading tasks.
1219 // Don't hack volatile and ordered loads.
1220 if (!LoadI->isUnordered()) return false;
1221
1222 // If the load is defined in a block with exactly one predecessor, it can't be
1223 // partially redundant.
1224 BasicBlock *LoadBB = LoadI->getParent();
1225 if (LoadBB->getSinglePredecessor())
1226 return false;
1227
1228 // If the load is defined in an EH pad, it can't be partially redundant,
1229 // because the edges between the invoke and the EH pad cannot have other
1230 // instructions between them.
1231 if (LoadBB->isEHPad())
1232 return false;
1233
1234 Value *LoadedPtr = LoadI->getOperand(0);
1235
1236 // If the loaded operand is defined in the LoadBB and its not a phi,
1237 // it can't be available in predecessors.
1238 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1239 return false;
1240
1241 // Scan a few instructions up from the load, to see if it is obviously live at
1242 // the entry to its block.
1243 BasicBlock::iterator BBIt(LoadI);
1244 bool IsLoadCSE;
1245 BatchAAResults BatchAA(*AA);
1246 // The dominator tree is updated lazily and may not be valid at this point.
1247 BatchAA.disableDominatorTree();
1248 if (Value *AvailableVal = FindAvailableLoadedValue(
1249 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1250 // If the value of the load is locally available within the block, just use
1251 // it. This frequently occurs for reg2mem'd allocas.
1252
1253 if (IsLoadCSE) {
1254 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1255 combineMetadataForCSE(NLoadI, LoadI, false);
1256 LVI->forgetValue(NLoadI);
1257 };
1258
1259 // If the returned value is the load itself, replace with poison. This can
1260 // only happen in dead loops.
1261 if (AvailableVal == LoadI)
1262 AvailableVal = PoisonValue::get(LoadI->getType());
1263 if (AvailableVal->getType() != LoadI->getType()) {
1264 AvailableVal = CastInst::CreateBitOrPointerCast(
1265 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1266 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1267 }
1268 LoadI->replaceAllUsesWith(AvailableVal);
1269 LoadI->eraseFromParent();
1270 return true;
1271 }
1272
1273 // Otherwise, if we scanned the whole block and got to the top of the block,
1274 // we know the block is locally transparent to the load. If not, something
1275 // might clobber its value.
1276 if (BBIt != LoadBB->begin())
1277 return false;
1278
1279 // If all of the loads and stores that feed the value have the same AA tags,
1280 // then we can propagate them onto any newly inserted loads.
1281 AAMDNodes AATags = LoadI->getAAMetadata();
1282
1283 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1284
1285 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1286
1287 AvailablePredsTy AvailablePreds;
1288 BasicBlock *OneUnavailablePred = nullptr;
1290
1291 // If we got here, the loaded value is transparent through to the start of the
1292 // block. Check to see if it is available in any of the predecessor blocks.
1293 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1294 // If we already scanned this predecessor, skip it.
1295 if (!PredsScanned.insert(PredBB).second)
1296 continue;
1297
1298 BBIt = PredBB->end();
1299 unsigned NumScanedInst = 0;
1300 Value *PredAvailable = nullptr;
1301 // NOTE: We don't CSE load that is volatile or anything stronger than
1302 // unordered, that should have been checked when we entered the function.
1303 assert(LoadI->isUnordered() &&
1304 "Attempting to CSE volatile or atomic loads");
1305 // If this is a load on a phi pointer, phi-translate it and search
1306 // for available load/store to the pointer in predecessors.
1307 Type *AccessTy = LoadI->getType();
1308 const auto &DL = LoadI->getDataLayout();
1309 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1310 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1311 AATags);
1312 PredAvailable = findAvailablePtrLoadStore(
1313 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1314 &BatchAA, &IsLoadCSE, &NumScanedInst);
1315
1316 // If PredBB has a single predecessor, continue scanning through the
1317 // single predecessor.
1318 BasicBlock *SinglePredBB = PredBB;
1319 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1320 NumScanedInst < DefMaxInstsToScan) {
1321 SinglePredBB = SinglePredBB->getSinglePredecessor();
1322 if (SinglePredBB) {
1323 BBIt = SinglePredBB->end();
1324 PredAvailable = findAvailablePtrLoadStore(
1325 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1326 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1327 &NumScanedInst);
1328 }
1329 }
1330
1331 if (!PredAvailable) {
1332 OneUnavailablePred = PredBB;
1333 continue;
1334 }
1335
1336 if (IsLoadCSE)
1337 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1338
1339 // If so, this load is partially redundant. Remember this info so that we
1340 // can create a PHI node.
1341 AvailablePreds.emplace_back(PredBB, PredAvailable);
1342 }
1343
1344 // If the loaded value isn't available in any predecessor, it isn't partially
1345 // redundant.
1346 if (AvailablePreds.empty()) return false;
1347
1348 // Okay, the loaded value is available in at least one (and maybe all!)
1349 // predecessors. If the value is unavailable in more than one unique
1350 // predecessor, we want to insert a merge block for those common predecessors.
1351 // This ensures that we only have to insert one reload, thus not increasing
1352 // code size.
1353 BasicBlock *UnavailablePred = nullptr;
1354
1355 // If the value is unavailable in one of predecessors, we will end up
1356 // inserting a new instruction into them. It is only valid if all the
1357 // instructions before LoadI are guaranteed to pass execution to its
1358 // successor, or if LoadI is safe to speculate.
1359 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1360 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1361 // It requires domination tree analysis, so for this simple case it is an
1362 // overkill.
1363 if (PredsScanned.size() != AvailablePreds.size() &&
1365 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1367 return false;
1368
1369 // If there is exactly one predecessor where the value is unavailable, the
1370 // already computed 'OneUnavailablePred' block is it. If it ends in an
1371 // unconditional branch, we know that it isn't a critical edge.
1372 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1373 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1374 UnavailablePred = OneUnavailablePred;
1375 } else if (PredsScanned.size() != AvailablePreds.size()) {
1376 // Otherwise, we had multiple unavailable predecessors or we had a critical
1377 // edge from the one.
1378 SmallVector<BasicBlock*, 8> PredsToSplit;
1379 SmallPtrSet<BasicBlock *, 8> AvailablePredSet(
1380 llvm::from_range, llvm::make_first_range(AvailablePreds));
1381
1382 // Add all the unavailable predecessors to the PredsToSplit list.
1383 for (BasicBlock *P : predecessors(LoadBB)) {
1384 // If the predecessor is an indirect goto, we can't split the edge.
1385 if (isa<IndirectBrInst>(P->getTerminator()))
1386 return false;
1387
1388 if (!AvailablePredSet.count(P))
1389 PredsToSplit.push_back(P);
1390 }
1391
1392 // Split them out to their own block.
1393 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1394 }
1395
1396 // If the value isn't available in all predecessors, then there will be
1397 // exactly one where it isn't available. Insert a load on that edge and add
1398 // it to the AvailablePreds list.
1399 if (UnavailablePred) {
1400 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1401 "Can't handle critical edge here!");
1402 LoadInst *NewVal = new LoadInst(
1403 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1404 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1405 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1406 UnavailablePred->getTerminator()->getIterator());
1407 NewVal->setDebugLoc(LoadI->getDebugLoc());
1408 if (AATags)
1409 NewVal->setAAMetadata(AATags);
1410
1411 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1412 }
1413
1414 // Now we know that each predecessor of this block has a value in
1415 // AvailablePreds, sort them for efficient access as we're walking the preds.
1416 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1417
1418 // Create a PHI node at the start of the block for the PRE'd load value.
1419 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1420 PN->insertBefore(LoadBB->begin());
1421 PN->takeName(LoadI);
1422 PN->setDebugLoc(LoadI->getDebugLoc());
1423
1424 // Insert new entries into the PHI for each predecessor. A single block may
1425 // have multiple entries here.
1426 for (BasicBlock *P : predecessors(LoadBB)) {
1427 AvailablePredsTy::iterator I =
1428 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1429
1430 assert(I != AvailablePreds.end() && I->first == P &&
1431 "Didn't find entry for predecessor!");
1432
1433 // If we have an available predecessor but it requires casting, insert the
1434 // cast in the predecessor and use the cast. Note that we have to update the
1435 // AvailablePreds vector as we go so that all of the PHI entries for this
1436 // predecessor use the same bitcast.
1437 Value *&PredV = I->second;
1438 if (PredV->getType() != LoadI->getType()) {
1440 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1441 // The new cast is producing the value used to replace the load
1442 // instruction, so uses the load's debug location. If P does not always
1443 // branch to the load BB however then the debug location must be dropped,
1444 // as it is hoisted past a conditional branch.
1445 DebugLoc DL = P->getTerminator()->getNumSuccessors() == 1
1446 ? LoadI->getDebugLoc()
1448 cast<CastInst>(PredV)->setDebugLoc(DL);
1449 }
1450
1451 PN->addIncoming(PredV, I->first);
1452 }
1453
1454 for (LoadInst *PredLoadI : CSELoads) {
1455 combineMetadataForCSE(PredLoadI, LoadI, true);
1456 LVI->forgetValue(PredLoadI);
1457 }
1458
1459 LoadI->replaceAllUsesWith(PN);
1460 LoadI->eraseFromParent();
1461
1462 return true;
1463}
1464
1465/// findMostPopularDest - The specified list contains multiple possible
1466/// threadable destinations. Pick the one that occurs the most frequently in
1467/// the list.
1468static BasicBlock *
1470 const SmallVectorImpl<std::pair<BasicBlock *,
1471 BasicBlock *>> &PredToDestList) {
1472 assert(!PredToDestList.empty());
1473
1474 // Determine popularity. If there are multiple possible destinations, we
1475 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1476 // blocks with known and real destinations to threading undef. We'll handle
1477 // them later if interesting.
1478 MapVector<BasicBlock *, unsigned> DestPopularity;
1479
1480 // Populate DestPopularity with the successors in the order they appear in the
1481 // successor list. This way, we ensure determinism by iterating it in the
1482 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1483 // return nullptr when PredToDestList contains nullptr only.
1484 DestPopularity[nullptr] = 0;
1485 for (auto *SuccBB : successors(BB))
1486 DestPopularity[SuccBB] = 0;
1487
1488 for (const auto &PredToDest : PredToDestList)
1489 if (PredToDest.second)
1490 DestPopularity[PredToDest.second]++;
1491
1492 // Find the most popular dest.
1493 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1494
1495 // Okay, we have finally picked the most popular destination.
1496 return MostPopular->first;
1497}
1498
1499// Try to evaluate the value of V when the control flows from PredPredBB to
1500// BB->getSinglePredecessor() and then on to BB.
1502 BasicBlock *PredPredBB,
1503 Value *V,
1504 const DataLayout &DL) {
1506 return evaluateOnPredecessorEdge(BB, PredPredBB, V, DL, Visited);
1507}
1508
1510 BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL,
1511 SmallPtrSet<Value *, 8> &Visited) {
1512 if (!Visited.insert(V).second)
1513 return nullptr;
1514 llvm::scope_exit _([&Visited, V]() { Visited.erase(V); });
1515
1516 BasicBlock *PredBB = BB->getSinglePredecessor();
1517 assert(PredBB && "Expected a single predecessor");
1518
1519 if (Constant *Cst = dyn_cast<Constant>(V)) {
1520 return Cst;
1521 }
1522
1523 // Consult LVI if V is not an instruction in BB or PredBB.
1525 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1526 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1527 }
1528
1529 // Look into a PHI argument.
1530 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1531 if (PHI->getParent() == PredBB)
1532 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1533 return nullptr;
1534 }
1535
1536 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1537 // Note that during the execution of the pass, phi nodes may become constant
1538 // and may be removed, which can lead to self-referencing instructions in
1539 // code that becomes unreachable. Consequently, we need to handle those
1540 // instructions in unreachable code and check before going into recursion.
1541 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1542 if (CondCmp->getParent() == BB) {
1544 BB, PredPredBB, CondCmp->getOperand(0), DL, Visited);
1546 BB, PredPredBB, CondCmp->getOperand(1), DL, Visited);
1547 if (Op0 && Op1) {
1548 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1549 Op1, DL);
1550 }
1551 }
1552 return nullptr;
1553 }
1554
1555 return nullptr;
1556}
1557
1559 ConstantPreference Preference,
1560 Instruction *CxtI) {
1561 // If threading this would thread across a loop header, don't even try to
1562 // thread the edge.
1563 if (LoopHeaders.count(BB))
1564 return false;
1565
1566 PredValueInfoTy PredValues;
1567 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1568 CxtI)) {
1569 // We don't have known values in predecessors. See if we can thread through
1570 // BB and its sole predecessor.
1572 }
1573
1574 assert(!PredValues.empty() &&
1575 "computeValueKnownInPredecessors returned true with no values");
1576
1577 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1578 for (const auto &PredValue : PredValues) {
1579 dbgs() << " BB '" << BB->getName()
1580 << "': FOUND condition = " << *PredValue.first
1581 << " for pred '" << PredValue.second->getName() << "'.\n";
1582 });
1583
1584 // Decide what we want to thread through. Convert our list of known values to
1585 // a list of known destinations for each pred. This also discards duplicate
1586 // predecessors and keeps track of the undefined inputs (which are represented
1587 // as a null dest in the PredToDestList).
1590
1591 BasicBlock *OnlyDest = nullptr;
1592 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1593 Constant *OnlyVal = nullptr;
1594 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1595
1596 for (const auto &PredValue : PredValues) {
1597 BasicBlock *Pred = PredValue.second;
1598 if (!SeenPreds.insert(Pred).second)
1599 continue; // Duplicate predecessor entry.
1600
1601 Constant *Val = PredValue.first;
1602
1603 BasicBlock *DestBB;
1604 if (isa<UndefValue>(Val))
1605 DestBB = nullptr;
1606 else if (CondBrInst *BI = dyn_cast<CondBrInst>(BB->getTerminator())) {
1607 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1608 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1609 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1610 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1611 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1612 } else {
1614 && "Unexpected terminator");
1615 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1616 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1617 }
1618
1619 // If we have exactly one destination, remember it for efficiency below.
1620 if (PredToDestList.empty()) {
1621 OnlyDest = DestBB;
1622 OnlyVal = Val;
1623 } else {
1624 if (OnlyDest != DestBB)
1625 OnlyDest = MultipleDestSentinel;
1626 // It possible we have same destination, but different value, e.g. default
1627 // case in switchinst.
1628 if (Val != OnlyVal)
1629 OnlyVal = MultipleVal;
1630 }
1631
1632 // If the predecessor ends with an indirect goto, we can't change its
1633 // destination.
1634 if (isa<IndirectBrInst>(Pred->getTerminator()))
1635 continue;
1636
1637 PredToDestList.emplace_back(Pred, DestBB);
1638 }
1639
1640 // If all edges were unthreadable, we fail.
1641 if (PredToDestList.empty())
1642 return false;
1643
1644 // If all the predecessors go to a single known successor, we want to fold,
1645 // not thread. By doing so, we do not need to duplicate the current block and
1646 // also miss potential opportunities in case we dont/cant duplicate.
1647 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1648 if (BB->hasNPredecessors(PredToDestList.size())) {
1649 bool SeenFirstBranchToOnlyDest = false;
1650 std::vector <DominatorTree::UpdateType> Updates;
1651 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1652 for (BasicBlock *SuccBB : successors(BB)) {
1653 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1654 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1655 } else {
1656 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1657 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1658 }
1659 }
1660
1661 // Finally update the terminator.
1662 Instruction *Term = BB->getTerminator();
1663 Instruction *NewBI = UncondBrInst::Create(OnlyDest, Term->getIterator());
1664 NewBI->setDebugLoc(Term->getDebugLoc());
1665 ++NumFolds;
1666 Term->eraseFromParent();
1667 DTU->applyUpdatesPermissive(Updates);
1668 if (auto *BPI = getBPI())
1669 BPI->eraseBlock(BB);
1670
1671 // If the condition is now dead due to the removal of the old terminator,
1672 // erase it.
1673 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1674 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1675 CondInst->eraseFromParent();
1676 // We can safely replace *some* uses of the CondInst if it has
1677 // exactly one value as returned by LVI. RAUW is incorrect in the
1678 // presence of guards and assumes, that have the `Cond` as the use. This
1679 // is because we use the guards/assume to reason about the `Cond` value
1680 // at the end of block, but RAUW unconditionally replaces all uses
1681 // including the guards/assumes themselves and the uses before the
1682 // guard/assume.
1683 else if (OnlyVal && OnlyVal != MultipleVal)
1684 replaceFoldableUses(CondInst, OnlyVal, BB);
1685 }
1686 return true;
1687 }
1688 }
1689
1690 // Determine which is the most common successor. If we have many inputs and
1691 // this block is a switch, we want to start by threading the batch that goes
1692 // to the most popular destination first. If we only know about one
1693 // threadable destination (the common case) we can avoid this.
1694 BasicBlock *MostPopularDest = OnlyDest;
1695
1696 if (MostPopularDest == MultipleDestSentinel) {
1697 // Remove any loop headers from the Dest list, threadEdge conservatively
1698 // won't process them, but we might have other destination that are eligible
1699 // and we still want to process.
1700 erase_if(PredToDestList,
1701 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1702 return LoopHeaders.contains(PredToDest.second);
1703 });
1704
1705 if (PredToDestList.empty())
1706 return false;
1707
1708 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1709 }
1710
1711 // Now that we know what the most popular destination is, factor all
1712 // predecessors that will jump to it into a single predecessor.
1713 SmallVector<BasicBlock*, 16> PredsToFactor;
1714 for (const auto &PredToDest : PredToDestList)
1715 if (PredToDest.second == MostPopularDest) {
1716 BasicBlock *Pred = PredToDest.first;
1717
1718 // This predecessor may be a switch or something else that has multiple
1719 // edges to the block. Factor each of these edges by listing them
1720 // according to # occurrences in PredsToFactor.
1721 for (BasicBlock *Succ : successors(Pred))
1722 if (Succ == BB)
1723 PredsToFactor.push_back(Pred);
1724 }
1725
1726 // If the threadable edges are branching on an undefined value, we get to pick
1727 // the destination that these predecessors should get to.
1728 if (!MostPopularDest)
1729 MostPopularDest = BB->getTerminator()->
1730 getSuccessor(getBestDestForJumpOnUndef(BB));
1731
1732 // Ok, try to thread it!
1733 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1734}
1735
1736/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1737/// a PHI node (or freeze PHI) in the current block. See if there are any
1738/// simplifications we can do based on inputs to the phi node.
1740 BasicBlock *BB = PN->getParent();
1741
1742 // TODO: We could make use of this to do it once for blocks with common PHI
1743 // values.
1745 PredBBs.resize(1);
1746
1747 // If any of the predecessor blocks end in an unconditional branch, we can
1748 // *duplicate* the conditional branch into that block in order to further
1749 // encourage jump threading and to eliminate cases where we have branch on a
1750 // phi of an icmp (branch on icmp is much better).
1751 // This is still beneficial when a frozen phi is used as the branch condition
1752 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1753 // to br(icmp(freeze ...)).
1754 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1755 BasicBlock *PredBB = PN->getIncomingBlock(i);
1756 if (isa<UncondBrInst>(PredBB->getTerminator())) {
1757 PredBBs[0] = PredBB;
1758 // Try to duplicate BB into PredBB.
1759 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1760 return true;
1761 }
1762 }
1763
1764 return false;
1765}
1766
1767/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1768/// a xor instruction in the current block. See if there are any
1769/// simplifications we can do based on inputs to the xor.
1771 BasicBlock *BB = BO->getParent();
1772
1773 // If either the LHS or RHS of the xor is a constant, don't do this
1774 // optimization.
1775 if (isa<ConstantInt>(BO->getOperand(0)) ||
1777 return false;
1778
1779 // If the first instruction in BB isn't a phi, we won't be able to infer
1780 // anything special about any particular predecessor.
1781 if (!isa<PHINode>(BB->front()))
1782 return false;
1783
1784 // If this BB is a landing pad, we won't be able to split the edge into it.
1785 if (BB->isEHPad())
1786 return false;
1787
1788 // If we have a xor as the branch input to this block, and we know that the
1789 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1790 // the condition into the predecessor and fix that value to true, saving some
1791 // logical ops on that path and encouraging other paths to simplify.
1792 //
1793 // This copies something like this:
1794 //
1795 // BB:
1796 // %X = phi i1 [1], [%X']
1797 // %Y = icmp eq i32 %A, %B
1798 // %Z = xor i1 %X, %Y
1799 // br i1 %Z, ...
1800 //
1801 // Into:
1802 // BB':
1803 // %Y = icmp ne i32 %A, %B
1804 // br i1 %Y, ...
1805
1806 PredValueInfoTy XorOpValues;
1807 bool isLHS = true;
1808 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1809 WantInteger, BO)) {
1810 assert(XorOpValues.empty());
1811 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1812 WantInteger, BO))
1813 return false;
1814 isLHS = false;
1815 }
1816
1817 assert(!XorOpValues.empty() &&
1818 "computeValueKnownInPredecessors returned true with no values");
1819
1820 // Scan the information to see which is most popular: true or false. The
1821 // predecessors can be of the set true, false, or undef.
1822 unsigned NumTrue = 0, NumFalse = 0;
1823 for (const auto &XorOpValue : XorOpValues) {
1824 if (isa<UndefValue>(XorOpValue.first))
1825 // Ignore undefs for the count.
1826 continue;
1827 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1828 ++NumFalse;
1829 else
1830 ++NumTrue;
1831 }
1832
1833 // Determine which value to split on, true, false, or undef if neither.
1834 ConstantInt *SplitVal = nullptr;
1835 if (NumTrue > NumFalse)
1836 SplitVal = ConstantInt::getTrue(BB->getContext());
1837 else if (NumTrue != 0 || NumFalse != 0)
1838 SplitVal = ConstantInt::getFalse(BB->getContext());
1839
1840 // Collect all of the blocks that this can be folded into so that we can
1841 // factor this once and clone it once.
1842 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1843 for (const auto &XorOpValue : XorOpValues) {
1844 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1845 continue;
1846
1847 BlocksToFoldInto.push_back(XorOpValue.second);
1848 }
1849
1850 // If we inferred a value for all of the predecessors, then duplication won't
1851 // help us. However, we can just replace the LHS or RHS with the constant.
1852 if (BlocksToFoldInto.size() ==
1853 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1854 if (!SplitVal) {
1855 // If all preds provide undef, just nuke the xor, because it is undef too.
1857 BO->eraseFromParent();
1858 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1859 // If all preds provide 0, replace the xor with the other input.
1860 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1861 BO->eraseFromParent();
1862 } else {
1863 // If all preds provide 1, set the computed value to 1.
1864 BO->setOperand(!isLHS, SplitVal);
1865 }
1866
1867 return true;
1868 }
1869
1870 // If any of predecessors end with an indirect goto, we can't change its
1871 // destination.
1872 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1873 return isa<IndirectBrInst>(Pred->getTerminator());
1874 }))
1875 return false;
1876
1877 // Try to duplicate BB into PredBB.
1878 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1879}
1880
1881/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1882/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1883/// NewPred using the entries from OldPred (suitably mapped).
1885 BasicBlock *OldPred,
1886 BasicBlock *NewPred,
1888 for (PHINode &PN : PHIBB->phis()) {
1889 // Ok, we have a PHI node. Figure out what the incoming value was for the
1890 // DestBlock.
1891 Value *IV = PN.getIncomingValueForBlock(OldPred);
1892
1893 // Remap the value if necessary.
1894 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1896 if (I != ValueMap.end())
1897 IV = I->second;
1898 }
1899
1900 PN.addIncoming(IV, NewPred);
1901 }
1902}
1903
1904/// Merge basic block BB into its sole predecessor if possible.
1906 BasicBlock *SinglePred = BB->getSinglePredecessor();
1907 if (!SinglePred)
1908 return false;
1909
1910 const Instruction *TI = SinglePred->getTerminator();
1911 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1912 SinglePred == BB || hasAddressTakenAndUsed(BB))
1913 return false;
1914
1915 // MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid
1916 // deleting a BB pointer from Unreachable.
1917 if (Unreachable.count(SinglePred))
1918 return false;
1919
1920 // Don't merge if both the basic block and the predecessor contain loop or
1921 // entry convergent intrinsics, since there may only be one convergence token
1922 // per block.
1925 return false;
1926
1927 // If SinglePred was a loop header, BB becomes one.
1928 if (LoopHeaders.erase(SinglePred))
1929 LoopHeaders.insert(BB);
1930
1931 LVI->eraseBlock(SinglePred);
1932 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1933
1934 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1935 // BB code within one basic block `BB`), we need to invalidate the LVI
1936 // information associated with BB, because the LVI information need not be
1937 // true for all of BB after the merge. For example,
1938 // Before the merge, LVI info and code is as follows:
1939 // SinglePred: <LVI info1 for %p val>
1940 // %y = use of %p
1941 // call @exit() // need not transfer execution to successor.
1942 // assume(%p) // from this point on %p is true
1943 // br label %BB
1944 // BB: <LVI info2 for %p val, i.e. %p is true>
1945 // %x = use of %p
1946 // br label exit
1947 //
1948 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1949 // (info2 and info1 respectively). After the merge and the deletion of the
1950 // LVI info1 for SinglePred. We have the following code:
1951 // BB: <LVI info2 for %p val>
1952 // %y = use of %p
1953 // call @exit()
1954 // assume(%p)
1955 // %x = use of %p <-- LVI info2 is correct from here onwards.
1956 // br label exit
1957 // LVI info2 for BB is incorrect at the beginning of BB.
1958
1959 // Invalidate LVI information for BB if the LVI is not provably true for
1960 // all of BB.
1962 LVI->eraseBlock(BB);
1963 return true;
1964}
1965
1966/// Update the SSA form. NewBB contains instructions that are copied from BB.
1967/// ValueMapping maps old values in BB to new ones in NewBB.
1969 ValueToValueMapTy &ValueMapping) {
1970 // If there were values defined in BB that are used outside the block, then we
1971 // now have to update all uses of the value to use either the original value,
1972 // the cloned value, or some PHI derived value. This can require arbitrary
1973 // PHI insertion, of which we are prepared to do, clean these up now.
1974 SSAUpdater SSAUpdate;
1975 SmallVector<Use *, 16> UsesToRename;
1976 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1977
1978 for (Instruction &I : *BB) {
1979 // Scan all uses of this instruction to see if it is used outside of its
1980 // block, and if so, record them in UsesToRename.
1981 for (Use &U : I.uses()) {
1982 Instruction *User = cast<Instruction>(U.getUser());
1983 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1984 if (UserPN->getIncomingBlock(U) == BB)
1985 continue;
1986 } else if (User->getParent() == BB)
1987 continue;
1988
1989 UsesToRename.push_back(&U);
1990 }
1991
1992 // Find debug values outside of the block
1993 findDbgValues(&I, DbgVariableRecords);
1994 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
1995 return DbgVarRec->getParent() == BB;
1996 });
1997
1998 // If there are no uses outside the block, we're done with this instruction.
1999 if (UsesToRename.empty() && DbgVariableRecords.empty())
2000 continue;
2001 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2002
2003 // We found a use of I outside of BB. Rename all uses of I that are outside
2004 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2005 // with the two values we know.
2006 SSAUpdate.Initialize(I.getType(), I.getName());
2007 SSAUpdate.AddAvailableValue(BB, &I);
2008 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2009
2010 while (!UsesToRename.empty())
2011 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2012 if (!DbgVariableRecords.empty()) {
2013 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
2014 DbgVariableRecords.clear();
2015 }
2016
2017 LLVM_DEBUG(dbgs() << "\n");
2018 }
2019}
2020
2023 if (VM.AtomMap.empty())
2024 return;
2025 for (auto It = Begin; It != End; ++It)
2026 RemapSourceAtom(&*It, VM);
2027}
2028
2029/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2030/// arguments that come from PredBB. Return the map from the variables in the
2031/// source basic block to the variables in the newly created basic block.
2032
2036 BasicBlock *NewBB,
2037 BasicBlock *PredBB) {
2038 // We are going to have to map operands from the source basic block to the new
2039 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2040 // block, evaluate them to account for entry from PredBB.
2041
2042 // Retargets dbg.value to any renamed variables.
2043 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2044 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2045 for (auto *Op : DVR->location_ops()) {
2047 if (!OpInst)
2048 continue;
2049
2050 auto I = ValueMapping.find(OpInst);
2051 if (I != ValueMapping.end())
2052 OperandsToRemap.insert({OpInst, I->second});
2053 }
2054
2055 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2056 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2057 };
2058
2059 BasicBlock *RangeBB = BI->getParent();
2060
2061 // Clone the phi nodes of the source basic block into NewBB. The resulting
2062 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2063 // might need to rewrite the operand of the cloned phi.
2064 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2065 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2066 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2067 ValueMapping[PN] = NewPN;
2068 if (const DebugLoc &DL = PN->getDebugLoc())
2069 mapAtomInstance(DL, ValueMapping);
2070 }
2071
2072 // Clone noalias scope declarations in the threaded block. When threading a
2073 // loop exit, we would otherwise end up with two idential scope declarations
2074 // visible at the same time.
2075 SmallVector<MDNode *> NoAliasScopes;
2076 DenseMap<MDNode *, MDNode *> ClonedScopes;
2077 LLVMContext &Context = PredBB->getContext();
2078 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2079 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2080
2081 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2082 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2083 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2084 RetargetDbgVariableRecordIfPossible(&DVR);
2085 };
2086
2087 // Clone the non-phi instructions of the source basic block into NewBB,
2088 // keeping track of the mapping and using it to remap operands in the cloned
2089 // instructions.
2090 for (; BI != BE; ++BI) {
2091 Instruction *New = BI->clone();
2092 New->setName(BI->getName());
2093 New->insertInto(NewBB, NewBB->end());
2094 ValueMapping[&*BI] = New;
2095 adaptNoAliasScopes(New, ClonedScopes, Context);
2096
2097 CloneAndRemapDbgInfo(New, &*BI);
2098 if (const DebugLoc &DL = New->getDebugLoc())
2099 mapAtomInstance(DL, ValueMapping);
2100
2101 // Remap operands to patch up intra-block references.
2102 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2103 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2104 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2105 if (I != ValueMapping.end())
2106 New->setOperand(i, I->second);
2107 }
2108 }
2109
2110 // There may be DbgVariableRecords on the terminator, clone directly from
2111 // marker to marker as there isn't an instruction there.
2112 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2113 // Dump them at the end.
2114 DbgMarker *Marker = RangeBB->getMarker(BE);
2115 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2116 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2117 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2118 RetargetDbgVariableRecordIfPossible(&DVR);
2119 }
2120}
2121
2122/// Attempt to thread through two successive basic blocks.
2124 Value *Cond) {
2125 // Consider:
2126 //
2127 // PredBB:
2128 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2129 // %tobool = icmp eq i32 %cond, 0
2130 // br i1 %tobool, label %BB, label ...
2131 //
2132 // BB:
2133 // %cmp = icmp eq i32* %var, null
2134 // br i1 %cmp, label ..., label ...
2135 //
2136 // We don't know the value of %var at BB even if we know which incoming edge
2137 // we take to BB. However, once we duplicate PredBB for each of its incoming
2138 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2139 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2140
2141 // Require that BB end with a Branch for simplicity.
2143 if (!CondBr)
2144 return false;
2145
2146 // BB must have exactly one predecessor.
2147 BasicBlock *PredBB = BB->getSinglePredecessor();
2148 if (!PredBB)
2149 return false;
2150
2151 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2152 // unconditional branch, we should be merging PredBB and BB instead. For
2153 // simplicity, we don't deal with a switch.
2154 CondBrInst *PredBBBranch = dyn_cast<CondBrInst>(PredBB->getTerminator());
2155 if (!PredBBBranch)
2156 return false;
2157
2158 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2159 // PredBB.
2160 if (PredBB->getSinglePredecessor())
2161 return false;
2162
2163 // Don't thread through PredBB if it contains a successor edge to itself, in
2164 // which case we would infinite loop. Suppose we are threading an edge from
2165 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2166 // successor edge to itself. If we allowed jump threading in this case, we
2167 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2168 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2169 // with another jump threading opportunity from PredBB.thread through PredBB
2170 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2171 // would keep peeling one iteration from PredBB.
2172 if (llvm::is_contained(successors(PredBB), PredBB))
2173 return false;
2174
2175 // Don't thread across a loop header.
2176 if (LoopHeaders.count(PredBB))
2177 return false;
2178
2179 // Avoid complication with duplicating EH pads.
2180 if (PredBB->isEHPad())
2181 return false;
2182
2183 // Find a predecessor that we can thread. For simplicity, we only consider a
2184 // successor edge out of BB to which we thread exactly one incoming edge into
2185 // PredBB.
2186 unsigned ZeroCount = 0;
2187 unsigned OneCount = 0;
2188 BasicBlock *ZeroPred = nullptr;
2189 BasicBlock *OnePred = nullptr;
2190 const DataLayout &DL = BB->getDataLayout();
2191 for (BasicBlock *P : predecessors(PredBB)) {
2192 // If PredPred ends with IndirectBrInst, we can't handle it.
2193 if (isa<IndirectBrInst>(P->getTerminator()))
2194 continue;
2197 if (CI->isZero()) {
2198 ZeroCount++;
2199 ZeroPred = P;
2200 } else if (CI->isOne()) {
2201 OneCount++;
2202 OnePred = P;
2203 }
2204 }
2205 }
2206
2207 // Disregard complicated cases where we have to thread multiple edges.
2208 BasicBlock *PredPredBB;
2209 if (ZeroCount == 1) {
2210 PredPredBB = ZeroPred;
2211 } else if (OneCount == 1) {
2212 PredPredBB = OnePred;
2213 } else {
2214 return false;
2215 }
2216
2217 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2218
2219 // If threading to the same block as we come from, we would infinite loop.
2220 if (SuccBB == BB) {
2221 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2222 << "' - would thread to self!\n");
2223 return false;
2224 }
2225
2226 // If threading this would thread across a loop header, don't thread the edge.
2227 // See the comments above findLoopHeaders for justifications and caveats.
2228 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2229 LLVM_DEBUG({
2230 bool BBIsHeader = LoopHeaders.count(BB);
2231 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2232 dbgs() << " Not threading across "
2233 << (BBIsHeader ? "loop header BB '" : "block BB '")
2234 << BB->getName() << "' to dest "
2235 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2236 << SuccBB->getName()
2237 << "' - it might create an irreducible loop!\n";
2238 });
2239 return false;
2240 }
2241
2242 // Compute the cost of duplicating BB and PredBB.
2243 unsigned BBCost = getJumpThreadDuplicationCost(
2244 TTI, BB, BB->getTerminator(), BBDupThreshold);
2245 unsigned PredBBCost = getJumpThreadDuplicationCost(
2246 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2247
2248 // Give up if costs are too high. We need to check BBCost and PredBBCost
2249 // individually before checking their sum because getJumpThreadDuplicationCost
2250 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2251 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2252 BBCost + PredBBCost > BBDupThreshold) {
2253 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2254 << "' - Cost is too high: " << PredBBCost
2255 << " for PredBB, " << BBCost << "for BB\n");
2256 return false;
2257 }
2258
2259 // Now we are ready to duplicate PredBB.
2260 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2261 return true;
2262}
2263
2265 BasicBlock *PredBB,
2266 BasicBlock *BB,
2267 BasicBlock *SuccBB) {
2268 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2269 << BB->getName() << "'\n");
2270
2271 // Build BPI/BFI before any changes are made to IR.
2272 bool HasProfile = doesBlockHaveProfileData(BB);
2273 auto *BFI = getOrCreateBFI(HasProfile);
2274 auto *BPI = getOrCreateBPI(BFI != nullptr);
2275
2276 CondBrInst *CondBr = cast<CondBrInst>(BB->getTerminator());
2277 CondBrInst *PredBBBranch = cast<CondBrInst>(PredBB->getTerminator());
2278
2279 BasicBlock *NewBB =
2280 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2281 PredBB->getParent(), PredBB);
2282 NewBB->moveAfter(PredBB);
2283
2284 // Set the block frequency of NewBB.
2285 if (BFI) {
2286 assert(BPI && "It's expected BPI to exist along with BFI");
2287 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2288 BPI->getEdgeProbability(PredPredBB, PredBB);
2289 BFI->setBlockFreq(NewBB, NewBBFreq);
2290 }
2291
2292 // We are going to have to map operands from the original BB block to the new
2293 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2294 // to account for entry from PredPredBB.
2295 ValueToValueMapTy ValueMapping;
2296 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2297 PredPredBB);
2298
2299 // Copy the edge probabilities from PredBB to NewBB.
2300 if (BPI)
2301 BPI->copyEdgeProbabilities(PredBB, NewBB);
2302
2303 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2304 // This eliminates predecessors from PredPredBB, which requires us to simplify
2305 // any PHI nodes in PredBB.
2306 Instruction *PredPredTerm = PredPredBB->getTerminator();
2307 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2308 if (PredPredTerm->getSuccessor(i) == PredBB) {
2309 PredBB->removePredecessor(PredPredBB, true);
2310 PredPredTerm->setSuccessor(i, NewBB);
2311 }
2312
2313 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2314 ValueMapping);
2315 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2316 ValueMapping);
2317
2318 DTU->applyUpdatesPermissive(
2319 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2320 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2321 {DominatorTree::Insert, PredPredBB, NewBB},
2322 {DominatorTree::Delete, PredPredBB, PredBB}});
2323
2324 // Remap source location atoms beacuse we're duplicating control flow.
2325 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2326
2327 updateSSA(PredBB, NewBB, ValueMapping);
2328
2329 // Clean up things like PHI nodes with single operands, dead instructions,
2330 // etc.
2331 SimplifyInstructionsInBlock(NewBB, TLI);
2332 SimplifyInstructionsInBlock(PredBB, TLI);
2333
2334 SmallVector<BasicBlock *, 1> PredsToFactor;
2335 PredsToFactor.push_back(NewBB);
2336 threadEdge(BB, PredsToFactor, SuccBB);
2337}
2338
2339/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2341 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2342 BasicBlock *SuccBB) {
2343 // If threading to the same block as we come from, we would infinite loop.
2344 if (SuccBB == BB) {
2345 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2346 << "' - would thread to self!\n");
2347 return false;
2348 }
2349
2350 // If threading this would thread across a loop header, don't thread the edge.
2351 // See the comments above findLoopHeaders for justifications and caveats.
2352 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2353 LLVM_DEBUG({
2354 bool BBIsHeader = LoopHeaders.count(BB);
2355 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2356 dbgs() << " Not threading across "
2357 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2358 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2359 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2360 });
2361 return false;
2362 }
2363
2364 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2365 TTI, BB, BB->getTerminator(), BBDupThreshold);
2366 if (JumpThreadCost > BBDupThreshold) {
2367 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2368 << "' - Cost is too high: " << JumpThreadCost << "\n");
2369 return false;
2370 }
2371
2372 threadEdge(BB, PredBBs, SuccBB);
2373 return true;
2374}
2375
2376/// threadEdge - We have decided that it is safe and profitable to factor the
2377/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2378/// across BB. Transform the IR to reflect this change.
2380 const SmallVectorImpl<BasicBlock *> &PredBBs,
2381 BasicBlock *SuccBB) {
2382 assert(SuccBB != BB && "Don't create an infinite loop");
2383
2384 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2385 "Don't thread across loop headers");
2386
2387 // Build BPI/BFI before any changes are made to IR.
2388 bool HasProfile = doesBlockHaveProfileData(BB);
2389 auto *BFI = getOrCreateBFI(HasProfile);
2390 auto *BPI = getOrCreateBPI(BFI != nullptr);
2391
2392 // And finally, do it! Start by factoring the predecessors if needed.
2393 BasicBlock *PredBB;
2394 if (PredBBs.size() == 1)
2395 PredBB = PredBBs[0];
2396 else {
2397 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2398 << " common predecessors.\n");
2399 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2400 }
2401
2402 // And finally, do it!
2403 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2404 << "' to '" << SuccBB->getName()
2405 << ", across block:\n " << *BB << "\n");
2406
2407 LVI->threadEdge(PredBB, BB, SuccBB);
2408
2410 BB->getName()+".thread",
2411 BB->getParent(), BB);
2412 NewBB->moveAfter(PredBB);
2413
2414 // Set the block frequency of NewBB.
2415 if (BFI) {
2416 assert(BPI && "It's expected BPI to exist along with BFI");
2417 auto NewBBFreq =
2418 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2419 BFI->setBlockFreq(NewBB, NewBBFreq);
2420 }
2421
2422 // Copy all the instructions from BB to NewBB except the terminator.
2423 ValueToValueMapTy ValueMapping;
2424 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2425 PredBB);
2426
2427 // We didn't copy the terminator from BB over to NewBB, because there is now
2428 // an unconditional jump to SuccBB. Insert the unconditional jump.
2429 UncondBrInst *NewBI = UncondBrInst::Create(SuccBB, NewBB);
2430 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2431
2432 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2433 // PHI nodes for NewBB now.
2434 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2435
2436 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2437 // eliminates predecessors from BB, which requires us to simplify any PHI
2438 // nodes in BB.
2439 Instruction *PredTerm = PredBB->getTerminator();
2440 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2441 if (PredTerm->getSuccessor(i) == BB) {
2442 BB->removePredecessor(PredBB, true);
2443 PredTerm->setSuccessor(i, NewBB);
2444 }
2445
2446 // Enqueue required DT updates.
2447 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2448 {DominatorTree::Insert, PredBB, NewBB},
2449 {DominatorTree::Delete, PredBB, BB}});
2450
2451 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2452 updateSSA(BB, NewBB, ValueMapping);
2453
2454 // At this point, the IR is fully up to date and consistent. Do a quick scan
2455 // over the new instructions and zap any that are constants or dead. This
2456 // frequently happens because of phi translation.
2457 SimplifyInstructionsInBlock(NewBB, TLI);
2458
2459 // Update the edge weight from BB to SuccBB, which should be less than before.
2460 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2461
2462 // Threaded an edge!
2463 ++NumThreads;
2464}
2465
2466/// Create a new basic block that will be the predecessor of BB and successor of
2467/// all blocks in Preds. When profile data is available, update the frequency of
2468/// this new block.
2469BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2471 const char *Suffix) {
2473
2474 // Collect the frequencies of all predecessors of BB, which will be used to
2475 // update the edge weight of the result of splitting predecessors.
2477 auto *BFI = getBFI();
2478 if (BFI) {
2479 auto *BPI = getOrCreateBPI(true);
2480 for (auto *Pred : Preds)
2481 FreqMap.insert(std::make_pair(
2482 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2483 }
2484
2485 // In the case when BB is a LandingPad block we create 2 new predecessors
2486 // instead of just one.
2487 if (BB->isLandingPad()) {
2488 std::string NewName = std::string(Suffix) + ".split-lp";
2489 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2490 } else {
2491 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2492 }
2493
2494 std::vector<DominatorTree::UpdateType> Updates;
2495 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2496 for (auto *NewBB : NewBBs) {
2497 BlockFrequency NewBBFreq(0);
2498 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2499 for (auto *Pred : predecessors(NewBB)) {
2500 Updates.push_back({DominatorTree::Delete, Pred, BB});
2501 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2502 if (BFI) // Update frequencies between Pred -> NewBB.
2503 NewBBFreq += FreqMap.lookup(Pred);
2504 }
2505 if (BFI) // Apply the summed frequency to NewBB.
2506 BFI->setBlockFreq(NewBB, NewBBFreq);
2507 }
2508
2509 DTU->applyUpdatesPermissive(Updates);
2510 return NewBBs[0];
2511}
2512
2513bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2514 const Instruction *TI = BB->getTerminator();
2515 if (!TI || TI->getNumSuccessors() < 2)
2516 return false;
2517
2518 return hasValidBranchWeightMD(*TI);
2519}
2520
2521/// Update the block frequency of BB and branch weight and the metadata on the
2522/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2523/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2524void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2525 BasicBlock *BB,
2526 BasicBlock *NewBB,
2527 BasicBlock *SuccBB,
2528 BlockFrequencyInfo *BFI,
2530 bool HasProfile) {
2531 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2532 "Both BFI & BPI should either be set or unset");
2533
2534 if (!BFI) {
2535 assert(!HasProfile &&
2536 "It's expected to have BFI/BPI when profile info exists");
2537 return;
2538 }
2539
2540 // As the edge from PredBB to BB is deleted, we have to update the block
2541 // frequency of BB.
2542 auto BBOrigFreq = BFI->getBlockFreq(BB);
2543 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2544 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2545 BFI->setBlockFreq(BB, BBNewFreq);
2546
2547 // Collect updated outgoing edges' frequencies from BB and use them to update
2548 // edge probabilities.
2549 SmallVector<uint64_t, 4> BBSuccFreq;
2550 for (auto It : enumerate(successors(BB))) {
2551 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, It.index());
2552 auto SuccFreq =
2553 (It.value() == SuccBB) ? BB2SuccBBFreq - NewBBFreq : BB2SuccBBFreq;
2554 BBSuccFreq.push_back(SuccFreq.getFrequency());
2555 }
2556
2557 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2558
2560 if (MaxBBSuccFreq == 0)
2561 BBSuccProbs.assign(BBSuccFreq.size(),
2562 {1, static_cast<unsigned>(BBSuccFreq.size())});
2563 else {
2564 for (uint64_t Freq : BBSuccFreq)
2565 BBSuccProbs.push_back(
2566 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2567 // Normalize edge probabilities so that they sum up to one.
2569 BBSuccProbs.end());
2570 }
2571
2572 // Update edge probabilities in BPI.
2573 BPI->setEdgeProbability(BB, BBSuccProbs);
2574
2575 // Update the profile metadata as well.
2576 //
2577 // Don't do this if the profile of the transformed blocks was statically
2578 // estimated. (This could occur despite the function having an entry
2579 // frequency in completely cold parts of the CFG.)
2580 //
2581 // In this case we don't want to suggest to subsequent passes that the
2582 // calculated weights are fully consistent. Consider this graph:
2583 //
2584 // check_1
2585 // 50% / |
2586 // eq_1 | 50%
2587 // \ |
2588 // check_2
2589 // 50% / |
2590 // eq_2 | 50%
2591 // \ |
2592 // check_3
2593 // 50% / |
2594 // eq_3 | 50%
2595 // \ |
2596 //
2597 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2598 // the overall probabilities are inconsistent; the total probability that the
2599 // value is either 1, 2 or 3 is 150%.
2600 //
2601 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2602 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2603 // the loop exit edge. Then based solely on static estimation we would assume
2604 // the loop was extremely hot.
2605 //
2606 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2607 // shouldn't make edges extremely likely or unlikely based solely on static
2608 // estimation.
2609 if (BBSuccProbs.size() >= 2 && HasProfile) {
2610 SmallVector<uint32_t, 4> Weights;
2611 for (auto Prob : BBSuccProbs)
2612 Weights.push_back(Prob.getNumerator());
2613
2614 auto TI = BB->getTerminator();
2615 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2616 }
2617}
2618
2619/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2620/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2621/// If we can duplicate the contents of BB up into PredBB do so now, this
2622/// improves the odds that the branch will be on an analyzable instruction like
2623/// a compare.
2625 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2626 assert(!PredBBs.empty() && "Can't handle an empty set");
2627
2628 // If BB is a loop header, then duplicating this block outside the loop would
2629 // cause us to transform this into an irreducible loop, don't do this.
2630 // See the comments above findLoopHeaders for justifications and caveats.
2631 if (LoopHeaders.count(BB)) {
2632 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2633 << "' into predecessor block '" << PredBBs[0]->getName()
2634 << "' - it might create an irreducible loop!\n");
2635 return false;
2636 }
2637
2638 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2639 TTI, BB, BB->getTerminator(), BBDupThreshold);
2640 if (DuplicationCost > BBDupThreshold) {
2641 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2642 << "' - Cost is too high: " << DuplicationCost << "\n");
2643 return false;
2644 }
2645
2646 // And finally, do it! Start by factoring the predecessors if needed.
2647 std::vector<DominatorTree::UpdateType> Updates;
2648 BasicBlock *PredBB;
2649 if (PredBBs.size() == 1)
2650 PredBB = PredBBs[0];
2651 else {
2652 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2653 << " common predecessors.\n");
2654 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2655 }
2656 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2657
2658 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2659 // of PredBB.
2660 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2661 << "' into end of '" << PredBB->getName()
2662 << "' to eliminate branch on phi. Cost: "
2663 << DuplicationCost << " block is:" << *BB << "\n");
2664
2665 // Unless PredBB ends with an unconditional branch, split the edge so that we
2666 // can just clone the bits from BB into the end of the new PredBB.
2667 UncondBrInst *OldPredBranch = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2668
2669 if (!OldPredBranch) {
2670 BasicBlock *OldPredBB = PredBB;
2671 PredBB = SplitEdge(OldPredBB, BB);
2672 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2673 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2674 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2675 OldPredBranch = cast<UncondBrInst>(PredBB->getTerminator());
2676 }
2677
2678 // We are going to have to map operands from the original BB block into the
2679 // PredBB block. Evaluate PHI nodes in BB.
2680 ValueToValueMapTy ValueMapping;
2681
2682 // Remember the position before the inserted instructions.
2683 auto RItBeforeInsertPt = std::next(OldPredBranch->getReverseIterator());
2684
2685 BasicBlock::iterator BI = BB->begin();
2686 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2687 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2688 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2689 // mapping and using it to remap operands in the cloned instructions.
2690 for (; BI != BB->end(); ++BI) {
2691 Instruction *New = BI->clone();
2692 New->insertInto(PredBB, OldPredBranch->getIterator());
2693
2694 // Remap operands to patch up intra-block references.
2695 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2696 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2697 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2698 if (I != ValueMapping.end())
2699 New->setOperand(i, I->second);
2700 }
2701
2702 // Remap debug variable operands.
2703 remapDebugVariable(ValueMapping, New);
2704 if (const DebugLoc &DL = New->getDebugLoc())
2705 mapAtomInstance(DL, ValueMapping);
2706
2707 // If this instruction can be simplified after the operands are updated,
2708 // just use the simplified value instead. This frequently happens due to
2709 // phi translation.
2711 New,
2712 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2713 ValueMapping[&*BI] = IV;
2714 if (!New->mayHaveSideEffects()) {
2715 New->eraseFromParent();
2716 New = nullptr;
2717 // Clone debug-info on the elided instruction to the destination
2718 // position.
2719 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2720 }
2721 } else {
2722 ValueMapping[&*BI] = New;
2723 }
2724 if (New) {
2725 // Otherwise, insert the new instruction into the block.
2726 New->setName(BI->getName());
2727 // Clone across any debug-info attached to the old instruction.
2728 New->cloneDebugInfoFrom(&*BI);
2729 // Update Dominance from simplified New instruction operands.
2730 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2731 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2732 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2733 }
2734 }
2735
2736 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2737 // add entries to the PHI nodes for branch from PredBB now.
2738 CondBrInst *BBBranch = cast<CondBrInst>(BB->getTerminator());
2739 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2740 ValueMapping);
2741 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2742 ValueMapping);
2743
2744 // KeyInstructions: Remap the cloned instructions' atoms only.
2745 remapSourceAtoms(ValueMapping, std::prev(RItBeforeInsertPt)->getIterator(),
2746 OldPredBranch->getIterator());
2747
2748 updateSSA(BB, PredBB, ValueMapping);
2749
2750 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2751 // that we nuked.
2752 BB->removePredecessor(PredBB, true);
2753
2754 // Remove the unconditional branch at the end of the PredBB block.
2755 OldPredBranch->eraseFromParent();
2756 if (auto *BPI = getBPI())
2757 BPI->copyEdgeProbabilities(BB, PredBB);
2758 DTU->applyUpdatesPermissive(Updates);
2759
2760 ++NumDupes;
2761 return true;
2762}
2763
2764// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2765// a Select instruction in Pred. BB has other predecessors and SI is used in
2766// a PHI node in BB. SI has no other use.
2767// A new basic block, NewBB, is created and SI is converted to compare and
2768// conditional branch. SI is erased from parent.
2770 SelectInst *SI, PHINode *SIUse,
2771 unsigned Idx) {
2772 // Expand the select.
2773 //
2774 // Pred --
2775 // | v
2776 // | NewBB
2777 // | |
2778 // |-----
2779 // v
2780 // BB
2781 UncondBrInst *PredTerm = cast<UncondBrInst>(Pred->getTerminator());
2782 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2783 BB->getParent(), BB);
2784 // Move the unconditional branch to NewBB.
2785 PredTerm->removeFromParent();
2786 PredTerm->insertInto(NewBB, NewBB->end());
2787 // Create a conditional branch and update PHI nodes.
2788 auto *BI = CondBrInst::Create(SI->getCondition(), NewBB, BB, Pred);
2789 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2790 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2791 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2792 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2793
2794 uint64_t TrueWeight = 1;
2795 uint64_t FalseWeight = 1;
2796 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2797 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2798 (TrueWeight + FalseWeight) != 0) {
2801 TrueWeight, TrueWeight + FalseWeight));
2803 FalseWeight, TrueWeight + FalseWeight));
2804 // Update BPI if exists.
2805 if (auto *BPI = getBPI())
2806 BPI->setEdgeProbability(Pred, BP);
2807 }
2808 // Set the block frequency of NewBB.
2809 if (auto *BFI = getBFI()) {
2810 if ((TrueWeight + FalseWeight) == 0) {
2811 TrueWeight = 1;
2812 FalseWeight = 1;
2813 }
2815 TrueWeight, TrueWeight + FalseWeight);
2816 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2817 BFI->setBlockFreq(NewBB, NewBBFreq);
2818 }
2819
2820 // The select is now dead.
2821 SI->eraseFromParent();
2822 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2823 {DominatorTree::Insert, Pred, NewBB}});
2824
2825 // Update any other PHI nodes in BB.
2826 for (BasicBlock::iterator BI = BB->begin();
2827 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2828 if (Phi != SIUse)
2829 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2830}
2831
2833 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2834
2835 if (!CondPHI || CondPHI->getParent() != BB)
2836 return false;
2837
2838 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2839 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2841
2842 // The second and third condition can be potentially relaxed. Currently
2843 // the conditions help to simplify the code and allow us to reuse existing
2844 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2845 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2846 continue;
2847
2848 UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Pred->getTerminator());
2849 if (!PredTerm)
2850 continue;
2851
2852 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2853 return true;
2854 }
2855 return false;
2856}
2857
2858/// tryToUnfoldSelect - Look for blocks of the form
2859/// bb1:
2860/// %a = select
2861/// br bb2
2862///
2863/// bb2:
2864/// %p = phi [%a, %bb1] ...
2865/// %c = icmp %p
2866/// br i1 %c
2867///
2868/// And expand the select into a branch structure if one of its arms allows %c
2869/// to be folded. This later enables threading from bb1 over bb2.
2872 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2873 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2874
2875 if (!CondBr || !CondLHS || CondLHS->getParent() != BB)
2876 return false;
2877
2878 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2879 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2881
2882 // Look if one of the incoming values is a select in the corresponding
2883 // predecessor.
2884 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2885 continue;
2886
2887 UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Pred->getTerminator());
2888 if (!PredTerm)
2889 continue;
2890
2891 // Now check if one of the select values would allow us to constant fold the
2892 // terminator in BB. We don't do the transform if both sides fold, those
2893 // cases will be threaded in any case.
2894 Constant *LHSRes =
2895 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2896 CondRHS, Pred, BB, CondCmp);
2897 Constant *RHSRes =
2898 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2899 CondRHS, Pred, BB, CondCmp);
2900 if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2901 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2902 return true;
2903 }
2904 }
2905 return false;
2906}
2907
2908/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2909/// same BB in the form
2910/// bb:
2911/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2912/// %s = select %p, trueval, falseval
2913///
2914/// or
2915///
2916/// bb:
2917/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2918/// %c = cmp %p, 0
2919/// %s = select %c, trueval, falseval
2920///
2921/// And expand the select into a branch structure. This later enables
2922/// jump-threading over bb in this pass.
2923///
2924/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2925/// select if the associated PHI has at least one constant. If the unfolded
2926/// select is not jump-threaded, it will be folded again in the later
2927/// optimizations.
2929 // This transform would reduce the quality of msan diagnostics.
2930 // Disable this transform under MemorySanitizer.
2931 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2932 return false;
2933
2934 // If threading this would thread across a loop header, don't thread the edge.
2935 // See the comments above findLoopHeaders for justifications and caveats.
2936 if (LoopHeaders.count(BB))
2937 return false;
2938
2939 for (BasicBlock::iterator BI = BB->begin();
2940 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2941 // Look for a Phi having at least one constant incoming value.
2942 if (llvm::all_of(PN->incoming_values(),
2943 [](Value *V) { return !isa<ConstantInt>(V); }))
2944 continue;
2945
2946 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2947 using namespace PatternMatch;
2948
2949 // Check if SI is in BB and use V as condition.
2950 if (SI->getParent() != BB)
2951 return false;
2952 Value *Cond = SI->getCondition();
2953 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2954 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2955 };
2956
2957 SelectInst *SI = nullptr;
2958 for (Use &U : PN->uses()) {
2959 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2960 // Look for a ICmp in BB that compares PN with a constant and is the
2961 // condition of a Select.
2962 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2963 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2964 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2965 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2966 SI = SelectI;
2967 break;
2968 }
2969 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2970 // Look for a Select in BB that uses PN as condition.
2971 if (isUnfoldCandidate(SelectI, U.get())) {
2972 SI = SelectI;
2973 break;
2974 }
2975 }
2976 }
2977
2978 if (!SI)
2979 continue;
2980 // Expand the select.
2981 Value *Cond = SI->getCondition();
2982 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) {
2983 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
2985 }
2986 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2987 Instruction *Term =
2988 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2989 BasicBlock *SplitBB = SI->getParent();
2990 BasicBlock *NewBB = Term->getParent();
2991 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
2992 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2993 NewPN->addIncoming(SI->getFalseValue(), BB);
2994 NewPN->setDebugLoc(SI->getDebugLoc());
2995 SI->replaceAllUsesWith(NewPN);
2996 SI->eraseFromParent();
2997 // NewBB and SplitBB are newly created blocks which require insertion.
2998 std::vector<DominatorTree::UpdateType> Updates;
2999 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
3000 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
3001 Updates.push_back({DominatorTree::Insert, BB, NewBB});
3002 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
3003 // BB's successors were moved to SplitBB, update DTU accordingly.
3004 for (auto *Succ : successors(SplitBB)) {
3005 Updates.push_back({DominatorTree::Delete, BB, Succ});
3006 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
3007 }
3008 DTU->applyUpdatesPermissive(Updates);
3009 return true;
3010 }
3011 return false;
3012}
3013
3014/// Try to propagate a guard from the current BB into one of its predecessors
3015/// in case if another branch of execution implies that the condition of this
3016/// guard is always true. Currently we only process the simplest case that
3017/// looks like:
3018///
3019/// Start:
3020/// %cond = ...
3021/// br i1 %cond, label %T1, label %F1
3022/// T1:
3023/// br label %Merge
3024/// F1:
3025/// br label %Merge
3026/// Merge:
3027/// %condGuard = ...
3028/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3029///
3030/// And cond either implies condGuard or !condGuard. In this case all the
3031/// instructions before the guard can be duplicated in both branches, and the
3032/// guard is then threaded to one of them.
3034 using namespace PatternMatch;
3035
3036 // We only want to deal with two predecessors.
3037 BasicBlock *Pred1, *Pred2;
3038 auto PI = pred_begin(BB), PE = pred_end(BB);
3039 if (PI == PE)
3040 return false;
3041 Pred1 = *PI++;
3042 if (PI == PE)
3043 return false;
3044 Pred2 = *PI++;
3045 if (PI != PE)
3046 return false;
3047 if (Pred1 == Pred2)
3048 return false;
3049
3050 // Try to thread one of the guards of the block.
3051 // TODO: Look up deeper than to immediate predecessor?
3052 auto *Parent = Pred1->getSinglePredecessor();
3053 if (!Parent || Parent != Pred2->getSinglePredecessor())
3054 return false;
3055
3056 if (auto *BI = dyn_cast<CondBrInst>(Parent->getTerminator()))
3057 for (auto &I : *BB)
3058 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3059 return true;
3060
3061 return false;
3062}
3063
3064/// Try to propagate the guard from BB which is the lower block of a diamond
3065/// to one of its branches, in case if diamond's condition implies guard's
3066/// condition.
3068 CondBrInst *BI) {
3069 Value *GuardCond = Guard->getArgOperand(0);
3070 Value *BranchCond = BI->getCondition();
3071 BasicBlock *TrueDest = BI->getSuccessor(0);
3072 BasicBlock *FalseDest = BI->getSuccessor(1);
3073
3074 auto &DL = BB->getDataLayout();
3075 bool TrueDestIsSafe = false;
3076 bool FalseDestIsSafe = false;
3077
3078 // True dest is safe if BranchCond => GuardCond.
3079 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3080 if (Impl && *Impl)
3081 TrueDestIsSafe = true;
3082 else {
3083 // False dest is safe if !BranchCond => GuardCond.
3084 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3085 if (Impl && *Impl)
3086 FalseDestIsSafe = true;
3087 }
3088
3089 if (!TrueDestIsSafe && !FalseDestIsSafe)
3090 return false;
3091
3092 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3093 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3094
3095 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3096 Instruction *AfterGuard = Guard->getNextNode();
3097 unsigned Cost =
3098 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3099 if (Cost > BBDupThreshold)
3100 return false;
3101 // Duplicate all instructions before the guard and the guard itself to the
3102 // branch where implication is not proved.
3104 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3105 assert(GuardedBlock && "Could not create the guarded block?");
3106 // Duplicate all instructions before the guard in the unguarded branch.
3107 // Since we have successfully duplicated the guarded block and this block
3108 // has fewer instructions, we expect it to succeed.
3110 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3111 assert(UnguardedBlock && "Could not create the unguarded block?");
3112 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3113 << GuardedBlock->getName() << "\n");
3114 // Some instructions before the guard may still have uses. For them, we need
3115 // to create Phi nodes merging their copies in both guarded and unguarded
3116 // branches. Those instructions that have no uses can be just removed.
3118 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3119 if (!isa<PHINode>(&*BI))
3120 ToRemove.push_back(&*BI);
3121
3123 assert(InsertionPoint != BB->end() && "Empty block?");
3124 // Substitute with Phis & remove.
3125 for (auto *Inst : reverse(ToRemove)) {
3126 if (!Inst->use_empty()) {
3127 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3128 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3129 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3130 NewPN->setDebugLoc(Inst->getDebugLoc());
3132 Inst->replaceAllUsesWith(NewPN);
3133 }
3134 Inst->dropDbgRecords();
3135 Inst->eraseFromParent();
3136 }
3137 return true;
3138}
3139
3140PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3144
3145 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3146 // TODO: Would be nice to verify BPI/BFI consistency as well.
3147 return PA;
3148}
3149
3150template <typename AnalysisT>
3151typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3152 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3153
3154 // If there were no changes since last call to 'runExternalAnalysis' then all
3155 // analysis is either up to date or explicitly invalidated. Just go ahead and
3156 // run the "external" analysis.
3157 if (!ChangedSinceLastAnalysisUpdate) {
3158 assert(!DTU->hasPendingUpdates() &&
3159 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3160 // Run the "external" analysis.
3161 return &FAM->getResult<AnalysisT>(*F);
3162 }
3163 ChangedSinceLastAnalysisUpdate = false;
3164
3165 auto PA = getPreservedAnalysis();
3166 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3167 // as preserved.
3168 PA.preserve<BranchProbabilityAnalysis>();
3169 PA.preserve<BlockFrequencyAnalysis>();
3170 // Report everything except explicitly preserved as invalid.
3171 FAM->invalidate(*F, PA);
3172 // Update DT/PDT.
3173 DTU->flush();
3174 // Make sure DT/PDT are valid before running "external" analysis.
3175 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3176 assert((!DTU->hasPostDomTree() ||
3177 DTU->getPostDomTree().verify(
3178 PostDominatorTree::VerificationLevel::Fast)));
3179 // Run the "external" analysis.
3180 auto *Result = &FAM->getResult<AnalysisT>(*F);
3181 // Update analysis JumpThreading depends on and not explicitly preserved.
3182 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3183 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3184 AA = &FAM->getResult<AAManager>(*F);
3185
3186 return Result;
3187}
3188
3189BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3190 if (!BPI) {
3191 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3192 BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(*F);
3193 }
3194 return BPI;
3195}
3196
3197BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3198 if (!BFI) {
3199 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3200 BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(*F);
3201 }
3202 return BFI;
3203}
3204
3205// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3206// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3207// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3208BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3209 auto *Res = getBPI();
3210 if (Res)
3211 return Res;
3212
3213 if (Force)
3214 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3215
3216 return BPI;
3217}
3218
3219BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3220 auto *Res = getBFI();
3221 if (Res)
3222 return Res;
3223
3224 if (Force)
3225 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3226
3227 return BFI;
3228}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void remapSourceAtoms(ValueToValueMapTy &VM, BasicBlock::iterator Begin, BasicBlock::iterator End)
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
ppc ctr loops verify
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition blake3_impl.h:83
A manager for alias analyses.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:539
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:696
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:493
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition BasicBlock.h:713
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition Constants.h:1065
static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Analysis providing branch probability information.
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * getNot(Constant *C)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Per-instruction record of debug-info.
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
LLVM_ABI const BasicBlock * getParent() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition Function.h:809
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
bool isSpecialTerminator() const
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
LLVM_ABI bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
LLVM_ABI bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
LLVM_ABI bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
LLVM_ABI void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
LLVM_ABI bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
LLVM_ABI JumpThreadingPass(int T=-1)
LLVM_ABI void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
LLVM_ABI bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
LLVM_ABI bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
LLVM_ABI bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
LLVM_ABI void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
LLVM_ABI bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI)
LLVM_ABI bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, CondBrInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
LLVM_ABI bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
LLVM_ABI bool processImpliedCondition(BasicBlock *BB)
LLVM_ABI bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
LLVM_ABI void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
LLVM_ABI bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
LLVM_ABI bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
LLVM_ABI bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
LLVM_ABI void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
An instruction for reading from memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
bool isUnordered() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Representation for a specific memory location.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
size_type size() const
Definition SmallPtrSet.h:99
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
'undef' values are things that do not have specified contents.
Definition Constants.h:1606
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
See the file comment.
Definition ValueMap.h:84
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
ValueMapIteratorImpl< MapT, const Value *, false > iterator
Definition ValueMap.h:135
DMAtomT AtomMap
Map {(InlinedAt, old atom number) -> new atom number}.
Definition ValueMap.h:123
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1102
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
iterator_range< use_iterator > uses()
Definition Value.h:381
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
A private "module" namespace for types and utilities used by JumpThreading.
SmallVector< std::pair< Constant *, BasicBlock * >, 8 > PredValueInfoTy
SmallVectorImpl< std::pair< Constant *, BasicBlock * > > PredValueInfo
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned long > StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden, cl::desc("Vectorize if the invocation count is < than this. 0 " "disables vectorization."))
InstructionCost Cost
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
auto pred_end(const MachineBasicBlock *BB)
LLVM_ABI unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition Local.cpp:3244
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
constexpr from_range_t from_range
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
LLVM_ABI Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition Loads.cpp:686
LLVM_ABI void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition Local.cpp:3480
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:723
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition Loads.cpp:553
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
LLVM_ABI BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
LLVM_ABI bool HasLoopOrEntryConvergenceToken(const BasicBlock *BB)
Check if the given basic block contains any loop or entry convergent intrinsic instructions.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3110
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
TargetTransformInfo TTI
LLVM_ABI void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition Local.cpp:763
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2052
LLVM_ABI void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1409
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1596
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition CFG.cpp:35
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
Function object to check whether the second component of a container supported by std::get (like std:...
Definition STLExtras.h:1448