LLVM 19.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <memory>
77#include <utility>
78
79using namespace llvm;
80using namespace jumpthreading;
81
82#define DEBUG_TYPE "jump-threading"
83
84STATISTIC(NumThreads, "Number of jumps threaded");
85STATISTIC(NumFolds, "Number of terminators folded");
86STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
87
89BBDuplicateThreshold("jump-threading-threshold",
90 cl::desc("Max block size to duplicate for jump threading"),
92
95 "jump-threading-implication-search-threshold",
96 cl::desc("The number of predecessors to search for a stronger "
97 "condition to use to thread over a weaker condition"),
99
101 "jump-threading-phi-threshold",
102 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
103 cl::Hidden);
104
106 "jump-threading-across-loop-headers",
107 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
108 cl::init(false), cl::Hidden);
109
111 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
112}
113
114// Update branch probability information according to conditional
115// branch probability. This is usually made possible for cloned branches
116// in inline instances by the context specific profile in the caller.
117// For instance,
118//
119// [Block PredBB]
120// [Branch PredBr]
121// if (t) {
122// Block A;
123// } else {
124// Block B;
125// }
126//
127// [Block BB]
128// cond = PN([true, %A], [..., %B]); // PHI node
129// [Branch CondBr]
130// if (cond) {
131// ... // P(cond == true) = 1%
132// }
133//
134// Here we know that when block A is taken, cond must be true, which means
135// P(cond == true | A) = 1
136//
137// Given that P(cond == true) = P(cond == true | A) * P(A) +
138// P(cond == true | B) * P(B)
139// we get:
140// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
141//
142// which gives us:
143// P(A) is less than P(cond == true), i.e.
144// P(t == true) <= P(cond == true)
145//
146// In other words, if we know P(cond == true) is unlikely, we know
147// that P(t == true) is also unlikely.
148//
150 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
151 if (!CondBr)
152 return;
153
154 uint64_t TrueWeight, FalseWeight;
155 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
156 return;
157
158 if (TrueWeight + FalseWeight == 0)
159 // Zero branch_weights do not give a hint for getting branch probabilities.
160 // Technically it would result in division by zero denominator, which is
161 // TrueWeight + FalseWeight.
162 return;
163
164 // Returns the outgoing edge of the dominating predecessor block
165 // that leads to the PhiNode's incoming block:
166 auto GetPredOutEdge =
167 [](BasicBlock *IncomingBB,
168 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
169 auto *PredBB = IncomingBB;
170 auto *SuccBB = PhiBB;
172 while (true) {
173 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
174 if (PredBr && PredBr->isConditional())
175 return {PredBB, SuccBB};
176 Visited.insert(PredBB);
177 auto *SinglePredBB = PredBB->getSinglePredecessor();
178 if (!SinglePredBB)
179 return {nullptr, nullptr};
180
181 // Stop searching when SinglePredBB has been visited. It means we see
182 // an unreachable loop.
183 if (Visited.count(SinglePredBB))
184 return {nullptr, nullptr};
185
186 SuccBB = PredBB;
187 PredBB = SinglePredBB;
188 }
189 };
190
191 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
192 Value *PhiOpnd = PN->getIncomingValue(i);
193 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
194
195 if (!CI || !CI->getType()->isIntegerTy(1))
196 continue;
197
200 TrueWeight, TrueWeight + FalseWeight)
202 FalseWeight, TrueWeight + FalseWeight));
203
204 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
205 if (!PredOutEdge.first)
206 return;
207
208 BasicBlock *PredBB = PredOutEdge.first;
209 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
210 if (!PredBr)
211 return;
212
213 uint64_t PredTrueWeight, PredFalseWeight;
214 // FIXME: We currently only set the profile data when it is missing.
215 // With PGO, this can be used to refine even existing profile data with
216 // context information. This needs to be done after more performance
217 // testing.
218 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
219 continue;
220
221 // We can not infer anything useful when BP >= 50%, because BP is the
222 // upper bound probability value.
223 if (BP >= BranchProbability(50, 100))
224 continue;
225
226 uint32_t Weights[2];
227 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
228 Weights[0] = BP.getNumerator();
229 Weights[1] = BP.getCompl().getNumerator();
230 } else {
231 Weights[0] = BP.getCompl().getNumerator();
232 Weights[1] = BP.getNumerator();
233 }
234 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
235 }
236}
237
240 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
241 // Jump Threading has no sense for the targets with divergent CF
243 return PreservedAnalyses::all();
244 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
245 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
246 auto &AA = AM.getResult<AAManager>(F);
247 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
248
249 bool Changed =
250 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
251 std::make_unique<DomTreeUpdater>(
252 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
253 std::nullopt, std::nullopt);
254
255 if (!Changed)
256 return PreservedAnalyses::all();
257
258
260
261#if defined(EXPENSIVE_CHECKS)
262 assert(getDomTreeUpdater()->getDomTree().verify(
263 DominatorTree::VerificationLevel::Full) &&
264 "DT broken after JumpThreading");
265 assert((!getDomTreeUpdater()->hasPostDomTree() ||
266 getDomTreeUpdater()->getPostDomTree().verify(
268 "PDT broken after JumpThreading");
269#else
270 assert(getDomTreeUpdater()->getDomTree().verify(
271 DominatorTree::VerificationLevel::Fast) &&
272 "DT broken after JumpThreading");
273 assert((!getDomTreeUpdater()->hasPostDomTree() ||
274 getDomTreeUpdater()->getPostDomTree().verify(
276 "PDT broken after JumpThreading");
277#endif
278
279 return getPreservedAnalysis();
280}
281
283 TargetLibraryInfo *TLI_,
285 AliasAnalysis *AA_,
286 std::unique_ptr<DomTreeUpdater> DTU_,
287 std::optional<BlockFrequencyInfo *> BFI_,
288 std::optional<BranchProbabilityInfo *> BPI_) {
289 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
290 F = &F_;
291 FAM = FAM_;
292 TLI = TLI_;
293 TTI = TTI_;
294 LVI = LVI_;
295 AA = AA_;
296 DTU = std::move(DTU_);
297 BFI = BFI_;
298 BPI = BPI_;
299 auto *GuardDecl = F->getParent()->getFunction(
300 Intrinsic::getName(Intrinsic::experimental_guard));
301 HasGuards = GuardDecl && !GuardDecl->use_empty();
302
303 // Reduce the number of instructions duplicated when optimizing strictly for
304 // size.
305 if (BBDuplicateThreshold.getNumOccurrences())
306 BBDupThreshold = BBDuplicateThreshold;
307 else if (F->hasFnAttribute(Attribute::MinSize))
308 BBDupThreshold = 3;
309 else
310 BBDupThreshold = DefaultBBDupThreshold;
311
312 // JumpThreading must not processes blocks unreachable from entry. It's a
313 // waste of compute time and can potentially lead to hangs.
315 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
316 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
317 DominatorTree &DT = DTU->getDomTree();
318 for (auto &BB : *F)
319 if (!DT.isReachableFromEntry(&BB))
320 Unreachable.insert(&BB);
321
324
325 bool EverChanged = false;
326 bool Changed;
327 do {
328 Changed = false;
329 for (auto &BB : *F) {
330 if (Unreachable.count(&BB))
331 continue;
332 while (processBlock(&BB)) // Thread all of the branches we can over BB.
333 Changed = ChangedSinceLastAnalysisUpdate = true;
334
335 // Jump threading may have introduced redundant debug values into BB
336 // which should be removed.
337 if (Changed)
339
340 // Stop processing BB if it's the entry or is now deleted. The following
341 // routines attempt to eliminate BB and locating a suitable replacement
342 // for the entry is non-trivial.
343 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
344 continue;
345
346 if (pred_empty(&BB)) {
347 // When processBlock makes BB unreachable it doesn't bother to fix up
348 // the instructions in it. We must remove BB to prevent invalid IR.
349 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
350 << "' with terminator: " << *BB.getTerminator()
351 << '\n');
352 LoopHeaders.erase(&BB);
353 LVI->eraseBlock(&BB);
354 DeleteDeadBlock(&BB, DTU.get());
355 Changed = ChangedSinceLastAnalysisUpdate = true;
356 continue;
357 }
358
359 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
360 // is "almost empty", we attempt to merge BB with its sole successor.
361 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
362 if (BI && BI->isUnconditional()) {
363 BasicBlock *Succ = BI->getSuccessor(0);
364 if (
365 // The terminator must be the only non-phi instruction in BB.
366 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
367 // Don't alter Loop headers and latches to ensure another pass can
368 // detect and transform nested loops later.
369 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
372 // BB is valid for cleanup here because we passed in DTU. F remains
373 // BB's parent until a DTU->getDomTree() event.
374 LVI->eraseBlock(&BB);
375 Changed = ChangedSinceLastAnalysisUpdate = true;
376 }
377 }
378 }
379 EverChanged |= Changed;
380 } while (Changed);
381
382 LoopHeaders.clear();
383 return EverChanged;
384}
385
386// Replace uses of Cond with ToVal when safe to do so. If all uses are
387// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
388// because we may incorrectly replace uses when guards/assumes are uses of
389// of `Cond` and we used the guards/assume to reason about the `Cond` value
390// at the end of block. RAUW unconditionally replaces all uses
391// including the guards/assumes themselves and the uses before the
392// guard/assume.
394 BasicBlock *KnownAtEndOfBB) {
395 bool Changed = false;
396 assert(Cond->getType() == ToVal->getType());
397 // We can unconditionally replace all uses in non-local blocks (i.e. uses
398 // strictly dominated by BB), since LVI information is true from the
399 // terminator of BB.
400 if (Cond->getParent() == KnownAtEndOfBB)
401 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
402 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
403 // Replace any debug-info record users of Cond with ToVal.
404 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
405 DVR.replaceVariableLocationOp(Cond, ToVal, true);
406
407 // Reached the Cond whose uses we are trying to replace, so there are no
408 // more uses.
409 if (&I == Cond)
410 break;
411 // We only replace uses in instructions that are guaranteed to reach the end
412 // of BB, where we know Cond is ToVal.
414 break;
415 Changed |= I.replaceUsesOfWith(Cond, ToVal);
416 }
417 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
418 Cond->eraseFromParent();
419 Changed = true;
420 }
421 return Changed;
422}
423
424/// Return the cost of duplicating a piece of this block from first non-phi
425/// and before StopAt instruction to thread across it. Stop scanning the block
426/// when exceeding the threshold. If duplication is impossible, returns ~0U.
428 BasicBlock *BB,
429 Instruction *StopAt,
430 unsigned Threshold) {
431 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
432
433 // Do not duplicate the BB if it has a lot of PHI nodes.
434 // If a threadable chain is too long then the number of PHI nodes can add up,
435 // leading to a substantial increase in compile time when rewriting the SSA.
436 unsigned PhiCount = 0;
437 Instruction *FirstNonPHI = nullptr;
438 for (Instruction &I : *BB) {
439 if (!isa<PHINode>(&I)) {
440 FirstNonPHI = &I;
441 break;
442 }
443 if (++PhiCount > PhiDuplicateThreshold)
444 return ~0U;
445 }
446
447 /// Ignore PHI nodes, these will be flattened when duplication happens.
448 BasicBlock::const_iterator I(FirstNonPHI);
449
450 // FIXME: THREADING will delete values that are just used to compute the
451 // branch, so they shouldn't count against the duplication cost.
452
453 unsigned Bonus = 0;
454 if (BB->getTerminator() == StopAt) {
455 // Threading through a switch statement is particularly profitable. If this
456 // block ends in a switch, decrease its cost to make it more likely to
457 // happen.
458 if (isa<SwitchInst>(StopAt))
459 Bonus = 6;
460
461 // The same holds for indirect branches, but slightly more so.
462 if (isa<IndirectBrInst>(StopAt))
463 Bonus = 8;
464 }
465
466 // Bump the threshold up so the early exit from the loop doesn't skip the
467 // terminator-based Size adjustment at the end.
468 Threshold += Bonus;
469
470 // Sum up the cost of each instruction until we get to the terminator. Don't
471 // include the terminator because the copy won't include it.
472 unsigned Size = 0;
473 for (; &*I != StopAt; ++I) {
474
475 // Stop scanning the block if we've reached the threshold.
476 if (Size > Threshold)
477 return Size;
478
479 // Bail out if this instruction gives back a token type, it is not possible
480 // to duplicate it if it is used outside this BB.
481 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
482 return ~0U;
483
484 // Blocks with NoDuplicate are modelled as having infinite cost, so they
485 // are never duplicated.
486 if (const CallInst *CI = dyn_cast<CallInst>(I))
487 if (CI->cannotDuplicate() || CI->isConvergent())
488 return ~0U;
489
492 continue;
493
494 // All other instructions count for at least one unit.
495 ++Size;
496
497 // Calls are more expensive. If they are non-intrinsic calls, we model them
498 // as having cost of 4. If they are a non-vector intrinsic, we model them
499 // as having cost of 2 total, and if they are a vector intrinsic, we model
500 // them as having cost 1.
501 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
502 if (!isa<IntrinsicInst>(CI))
503 Size += 3;
504 else if (!CI->getType()->isVectorTy())
505 Size += 1;
506 }
507 }
508
509 return Size > Bonus ? Size - Bonus : 0;
510}
511
512/// findLoopHeaders - We do not want jump threading to turn proper loop
513/// structures into irreducible loops. Doing this breaks up the loop nesting
514/// hierarchy and pessimizes later transformations. To prevent this from
515/// happening, we first have to find the loop headers. Here we approximate this
516/// by finding targets of backedges in the CFG.
517///
518/// Note that there definitely are cases when we want to allow threading of
519/// edges across a loop header. For example, threading a jump from outside the
520/// loop (the preheader) to an exit block of the loop is definitely profitable.
521/// It is also almost always profitable to thread backedges from within the loop
522/// to exit blocks, and is often profitable to thread backedges to other blocks
523/// within the loop (forming a nested loop). This simple analysis is not rich
524/// enough to track all of these properties and keep it up-to-date as the CFG
525/// mutates, so we don't allow any of these transformations.
528 FindFunctionBackedges(F, Edges);
529
530 for (const auto &Edge : Edges)
531 LoopHeaders.insert(Edge.second);
532}
533
534/// getKnownConstant - Helper method to determine if we can thread over a
535/// terminator with the given value as its condition, and if so what value to
536/// use for that. What kind of value this is depends on whether we want an
537/// integer or a block address, but an undef is always accepted.
538/// Returns null if Val is null or not an appropriate constant.
540 if (!Val)
541 return nullptr;
542
543 // Undef is "known" enough.
544 if (UndefValue *U = dyn_cast<UndefValue>(Val))
545 return U;
546
547 if (Preference == WantBlockAddress)
548 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
549
550 return dyn_cast<ConstantInt>(Val);
551}
552
553/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
554/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
555/// in any of our predecessors. If so, return the known list of value and pred
556/// BB in the result vector.
557///
558/// This returns true if there were any known values.
560 Value *V, BasicBlock *BB, PredValueInfo &Result,
561 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
562 Instruction *CxtI) {
563 const DataLayout &DL = BB->getDataLayout();
564
565 // This method walks up use-def chains recursively. Because of this, we could
566 // get into an infinite loop going around loops in the use-def chain. To
567 // prevent this, keep track of what (value, block) pairs we've already visited
568 // and terminate the search if we loop back to them
569 if (!RecursionSet.insert(V).second)
570 return false;
571
572 // If V is a constant, then it is known in all predecessors.
573 if (Constant *KC = getKnownConstant(V, Preference)) {
574 for (BasicBlock *Pred : predecessors(BB))
575 Result.emplace_back(KC, Pred);
576
577 return !Result.empty();
578 }
579
580 // If V is a non-instruction value, or an instruction in a different block,
581 // then it can't be derived from a PHI.
582 Instruction *I = dyn_cast<Instruction>(V);
583 if (!I || I->getParent() != BB) {
584
585 // Okay, if this is a live-in value, see if it has a known value at the any
586 // edge from our predecessors.
587 for (BasicBlock *P : predecessors(BB)) {
588 using namespace PatternMatch;
589 // If the value is known by LazyValueInfo to be a constant in a
590 // predecessor, use that information to try to thread this block.
591 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
592 // If I is a non-local compare-with-constant instruction, use more-rich
593 // 'getPredicateOnEdge' method. This would be able to handle value
594 // inequalities better, for example if the compare is "X < 4" and "X < 3"
595 // is known true but "X < 4" itself is not available.
597 Value *Val;
598 Constant *Cst;
599 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
600 auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
601 if (Res != LazyValueInfo::Unknown)
602 PredCst = ConstantInt::getBool(V->getContext(), Res);
603 }
604 if (Constant *KC = getKnownConstant(PredCst, Preference))
605 Result.emplace_back(KC, P);
606 }
607
608 return !Result.empty();
609 }
610
611 /// If I is a PHI node, then we know the incoming values for any constants.
612 if (PHINode *PN = dyn_cast<PHINode>(I)) {
613 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
614 Value *InVal = PN->getIncomingValue(i);
615 if (Constant *KC = getKnownConstant(InVal, Preference)) {
616 Result.emplace_back(KC, PN->getIncomingBlock(i));
617 } else {
618 Constant *CI = LVI->getConstantOnEdge(InVal,
619 PN->getIncomingBlock(i),
620 BB, CxtI);
621 if (Constant *KC = getKnownConstant(CI, Preference))
622 Result.emplace_back(KC, PN->getIncomingBlock(i));
623 }
624 }
625
626 return !Result.empty();
627 }
628
629 // Handle Cast instructions.
630 if (CastInst *CI = dyn_cast<CastInst>(I)) {
631 Value *Source = CI->getOperand(0);
632 PredValueInfoTy Vals;
633 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
634 RecursionSet, CxtI);
635 if (Vals.empty())
636 return false;
637
638 // Convert the known values.
639 for (auto &Val : Vals)
640 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
641 CI->getType(), DL))
642 Result.emplace_back(Folded, Val.second);
643
644 return !Result.empty();
645 }
646
647 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
648 Value *Source = FI->getOperand(0);
649 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
650 RecursionSet, CxtI);
651
652 erase_if(Result, [](auto &Pair) {
653 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
654 });
655
656 return !Result.empty();
657 }
658
659 // Handle some boolean conditions.
660 if (I->getType()->getPrimitiveSizeInBits() == 1) {
661 using namespace PatternMatch;
662 if (Preference != WantInteger)
663 return false;
664 // X | true -> true
665 // X & false -> false
666 Value *Op0, *Op1;
667 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
668 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
669 PredValueInfoTy LHSVals, RHSVals;
670
672 RecursionSet, CxtI);
674 RecursionSet, CxtI);
675
676 if (LHSVals.empty() && RHSVals.empty())
677 return false;
678
679 ConstantInt *InterestingVal;
680 if (match(I, m_LogicalOr()))
681 InterestingVal = ConstantInt::getTrue(I->getContext());
682 else
683 InterestingVal = ConstantInt::getFalse(I->getContext());
684
685 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
686
687 // Scan for the sentinel. If we find an undef, force it to the
688 // interesting value: x|undef -> true and x&undef -> false.
689 for (const auto &LHSVal : LHSVals)
690 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
691 Result.emplace_back(InterestingVal, LHSVal.second);
692 LHSKnownBBs.insert(LHSVal.second);
693 }
694 for (const auto &RHSVal : RHSVals)
695 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
696 // If we already inferred a value for this block on the LHS, don't
697 // re-add it.
698 if (!LHSKnownBBs.count(RHSVal.second))
699 Result.emplace_back(InterestingVal, RHSVal.second);
700 }
701
702 return !Result.empty();
703 }
704
705 // Handle the NOT form of XOR.
706 if (I->getOpcode() == Instruction::Xor &&
707 isa<ConstantInt>(I->getOperand(1)) &&
708 cast<ConstantInt>(I->getOperand(1))->isOne()) {
709 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
710 WantInteger, RecursionSet, CxtI);
711 if (Result.empty())
712 return false;
713
714 // Invert the known values.
715 for (auto &R : Result)
716 R.first = ConstantExpr::getNot(R.first);
717
718 return true;
719 }
720
721 // Try to simplify some other binary operator values.
722 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
723 if (Preference != WantInteger)
724 return false;
725 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
726 PredValueInfoTy LHSVals;
727 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
728 WantInteger, RecursionSet, CxtI);
729
730 // Try to use constant folding to simplify the binary operator.
731 for (const auto &LHSVal : LHSVals) {
732 Constant *V = LHSVal.first;
733 Constant *Folded =
734 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
735
736 if (Constant *KC = getKnownConstant(Folded, WantInteger))
737 Result.emplace_back(KC, LHSVal.second);
738 }
739 }
740
741 return !Result.empty();
742 }
743
744 // Handle compare with phi operand, where the PHI is defined in this block.
745 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
746 if (Preference != WantInteger)
747 return false;
748 Type *CmpType = Cmp->getType();
749 Value *CmpLHS = Cmp->getOperand(0);
750 Value *CmpRHS = Cmp->getOperand(1);
751 CmpInst::Predicate Pred = Cmp->getPredicate();
752
753 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
754 if (!PN)
755 PN = dyn_cast<PHINode>(CmpRHS);
756 // Do not perform phi translation across a loop header phi, because this
757 // may result in comparison of values from two different loop iterations.
758 // FIXME: This check is broken if LoopHeaders is not populated.
759 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
760 const DataLayout &DL = PN->getDataLayout();
761 // We can do this simplification if any comparisons fold to true or false.
762 // See if any do.
763 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
764 BasicBlock *PredBB = PN->getIncomingBlock(i);
765 Value *LHS, *RHS;
766 if (PN == CmpLHS) {
767 LHS = PN->getIncomingValue(i);
768 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
769 } else {
770 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
771 RHS = PN->getIncomingValue(i);
772 }
773 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
774 if (!Res) {
775 if (!isa<Constant>(RHS))
776 continue;
777
778 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
779 auto LHSInst = dyn_cast<Instruction>(LHS);
780 if (LHSInst && LHSInst->getParent() == BB)
781 continue;
782
784 ResT = LVI->getPredicateOnEdge(Pred, LHS,
785 cast<Constant>(RHS), PredBB, BB,
786 CxtI ? CxtI : Cmp);
787 if (ResT == LazyValueInfo::Unknown)
788 continue;
789 Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
790 }
791
792 if (Constant *KC = getKnownConstant(Res, WantInteger))
793 Result.emplace_back(KC, PredBB);
794 }
795
796 return !Result.empty();
797 }
798
799 // If comparing a live-in value against a constant, see if we know the
800 // live-in value on any predecessors.
801 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
802 Constant *CmpConst = cast<Constant>(CmpRHS);
803
804 if (!isa<Instruction>(CmpLHS) ||
805 cast<Instruction>(CmpLHS)->getParent() != BB) {
806 for (BasicBlock *P : predecessors(BB)) {
807 // If the value is known by LazyValueInfo to be a constant in a
808 // predecessor, use that information to try to thread this block.
810 LVI->getPredicateOnEdge(Pred, CmpLHS,
811 CmpConst, P, BB, CxtI ? CxtI : Cmp);
812 if (Res == LazyValueInfo::Unknown)
813 continue;
814
815 Constant *ResC = ConstantInt::get(CmpType, Res);
816 Result.emplace_back(ResC, P);
817 }
818
819 return !Result.empty();
820 }
821
822 // InstCombine can fold some forms of constant range checks into
823 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
824 // x as a live-in.
825 {
826 using namespace PatternMatch;
827
828 Value *AddLHS;
829 ConstantInt *AddConst;
830 if (isa<ConstantInt>(CmpConst) &&
831 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
832 if (!isa<Instruction>(AddLHS) ||
833 cast<Instruction>(AddLHS)->getParent() != BB) {
834 for (BasicBlock *P : predecessors(BB)) {
835 // If the value is known by LazyValueInfo to be a ConstantRange in
836 // a predecessor, use that information to try to thread this
837 // block.
839 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
840 // Propagate the range through the addition.
841 CR = CR.add(AddConst->getValue());
842
843 // Get the range where the compare returns true.
845 Pred, cast<ConstantInt>(CmpConst)->getValue());
846
847 Constant *ResC;
848 if (CmpRange.contains(CR))
849 ResC = ConstantInt::getTrue(CmpType);
850 else if (CmpRange.inverse().contains(CR))
851 ResC = ConstantInt::getFalse(CmpType);
852 else
853 continue;
854
855 Result.emplace_back(ResC, P);
856 }
857
858 return !Result.empty();
859 }
860 }
861 }
862
863 // Try to find a constant value for the LHS of a comparison,
864 // and evaluate it statically if we can.
865 PredValueInfoTy LHSVals;
866 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
867 WantInteger, RecursionSet, CxtI);
868
869 for (const auto &LHSVal : LHSVals) {
870 Constant *V = LHSVal.first;
871 Constant *Folded =
872 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
873 if (Constant *KC = getKnownConstant(Folded, WantInteger))
874 Result.emplace_back(KC, LHSVal.second);
875 }
876
877 return !Result.empty();
878 }
879 }
880
881 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
882 // Handle select instructions where at least one operand is a known constant
883 // and we can figure out the condition value for any predecessor block.
884 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
885 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
886 PredValueInfoTy Conds;
887 if ((TrueVal || FalseVal) &&
888 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
889 WantInteger, RecursionSet, CxtI)) {
890 for (auto &C : Conds) {
891 Constant *Cond = C.first;
892
893 // Figure out what value to use for the condition.
894 bool KnownCond;
895 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
896 // A known boolean.
897 KnownCond = CI->isOne();
898 } else {
899 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
900 // Either operand will do, so be sure to pick the one that's a known
901 // constant.
902 // FIXME: Do this more cleverly if both values are known constants?
903 KnownCond = (TrueVal != nullptr);
904 }
905
906 // See if the select has a known constant value for this predecessor.
907 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
908 Result.emplace_back(Val, C.second);
909 }
910
911 return !Result.empty();
912 }
913 }
914
915 // If all else fails, see if LVI can figure out a constant value for us.
916 assert(CxtI->getParent() == BB && "CxtI should be in BB");
917 Constant *CI = LVI->getConstant(V, CxtI);
918 if (Constant *KC = getKnownConstant(CI, Preference)) {
919 for (BasicBlock *Pred : predecessors(BB))
920 Result.emplace_back(KC, Pred);
921 }
922
923 return !Result.empty();
924}
925
926/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
927/// in an undefined jump, decide which block is best to revector to.
928///
929/// Since we can pick an arbitrary destination, we pick the successor with the
930/// fewest predecessors. This should reduce the in-degree of the others.
932 Instruction *BBTerm = BB->getTerminator();
933 unsigned MinSucc = 0;
934 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
935 // Compute the successor with the minimum number of predecessors.
936 unsigned MinNumPreds = pred_size(TestBB);
937 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
938 TestBB = BBTerm->getSuccessor(i);
939 unsigned NumPreds = pred_size(TestBB);
940 if (NumPreds < MinNumPreds) {
941 MinSucc = i;
942 MinNumPreds = NumPreds;
943 }
944 }
945
946 return MinSucc;
947}
948
950 if (!BB->hasAddressTaken()) return false;
951
952 // If the block has its address taken, it may be a tree of dead constants
953 // hanging off of it. These shouldn't keep the block alive.
956 return !BA->use_empty();
957}
958
959/// processBlock - If there are any predecessors whose control can be threaded
960/// through to a successor, transform them now.
962 // If the block is trivially dead, just return and let the caller nuke it.
963 // This simplifies other transformations.
964 if (DTU->isBBPendingDeletion(BB) ||
965 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
966 return false;
967
968 // If this block has a single predecessor, and if that pred has a single
969 // successor, merge the blocks. This encourages recursive jump threading
970 // because now the condition in this block can be threaded through
971 // predecessors of our predecessor block.
973 return true;
974
976 return true;
977
978 // Look if we can propagate guards to predecessors.
979 if (HasGuards && processGuards(BB))
980 return true;
981
982 // What kind of constant we're looking for.
983 ConstantPreference Preference = WantInteger;
984
985 // Look to see if the terminator is a conditional branch, switch or indirect
986 // branch, if not we can't thread it.
987 Value *Condition;
988 Instruction *Terminator = BB->getTerminator();
989 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
990 // Can't thread an unconditional jump.
991 if (BI->isUnconditional()) return false;
992 Condition = BI->getCondition();
993 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
994 Condition = SI->getCondition();
995 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
996 // Can't thread indirect branch with no successors.
997 if (IB->getNumSuccessors() == 0) return false;
998 Condition = IB->getAddress()->stripPointerCasts();
999 Preference = WantBlockAddress;
1000 } else {
1001 return false; // Must be an invoke or callbr.
1002 }
1003
1004 // Keep track if we constant folded the condition in this invocation.
1005 bool ConstantFolded = false;
1006
1007 // Run constant folding to see if we can reduce the condition to a simple
1008 // constant.
1009 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1010 Value *SimpleVal =
1012 if (SimpleVal) {
1013 I->replaceAllUsesWith(SimpleVal);
1014 if (isInstructionTriviallyDead(I, TLI))
1015 I->eraseFromParent();
1016 Condition = SimpleVal;
1017 ConstantFolded = true;
1018 }
1019 }
1020
1021 // If the terminator is branching on an undef or freeze undef, we can pick any
1022 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1023 auto *FI = dyn_cast<FreezeInst>(Condition);
1024 if (isa<UndefValue>(Condition) ||
1025 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1026 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1027 std::vector<DominatorTree::UpdateType> Updates;
1028
1029 // Fold the branch/switch.
1030 Instruction *BBTerm = BB->getTerminator();
1031 Updates.reserve(BBTerm->getNumSuccessors());
1032 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1033 if (i == BestSucc) continue;
1034 BasicBlock *Succ = BBTerm->getSuccessor(i);
1035 Succ->removePredecessor(BB, true);
1036 Updates.push_back({DominatorTree::Delete, BB, Succ});
1037 }
1038
1039 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1040 << "' folding undef terminator: " << *BBTerm << '\n');
1041 Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());
1042 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1043 ++NumFolds;
1044 BBTerm->eraseFromParent();
1045 DTU->applyUpdatesPermissive(Updates);
1046 if (FI)
1047 FI->eraseFromParent();
1048 return true;
1049 }
1050
1051 // If the terminator of this block is branching on a constant, simplify the
1052 // terminator to an unconditional branch. This can occur due to threading in
1053 // other blocks.
1054 if (getKnownConstant(Condition, Preference)) {
1055 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1056 << "' folding terminator: " << *BB->getTerminator()
1057 << '\n');
1058 ++NumFolds;
1059 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1060 if (auto *BPI = getBPI())
1061 BPI->eraseBlock(BB);
1062 return true;
1063 }
1064
1065 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1066
1067 // All the rest of our checks depend on the condition being an instruction.
1068 if (!CondInst) {
1069 // FIXME: Unify this with code below.
1070 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1071 return true;
1072 return ConstantFolded;
1073 }
1074
1075 // Some of the following optimization can safely work on the unfrozen cond.
1076 Value *CondWithoutFreeze = CondInst;
1077 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1078 CondWithoutFreeze = FI->getOperand(0);
1079
1080 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1081 // If we're branching on a conditional, LVI might be able to determine
1082 // it's value at the branch instruction. We only handle comparisons
1083 // against a constant at this time.
1084 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1086 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1087 CondConst, BB->getTerminator(),
1088 /*UseBlockValue=*/false);
1089 if (Ret != LazyValueInfo::Unknown) {
1090 // We can safely replace *some* uses of the CondInst if it has
1091 // exactly one value as returned by LVI. RAUW is incorrect in the
1092 // presence of guards and assumes, that have the `Cond` as the use. This
1093 // is because we use the guards/assume to reason about the `Cond` value
1094 // at the end of block, but RAUW unconditionally replaces all uses
1095 // including the guards/assumes themselves and the uses before the
1096 // guard/assume.
1097 auto *CI = Ret == LazyValueInfo::True ?
1098 ConstantInt::getTrue(CondCmp->getType()) :
1099 ConstantInt::getFalse(CondCmp->getType());
1100 if (replaceFoldableUses(CondCmp, CI, BB))
1101 return true;
1102 }
1103
1104 // We did not manage to simplify this branch, try to see whether
1105 // CondCmp depends on a known phi-select pattern.
1106 if (tryToUnfoldSelect(CondCmp, BB))
1107 return true;
1108 }
1109 }
1110
1111 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1112 if (tryToUnfoldSelect(SI, BB))
1113 return true;
1114
1115 // Check for some cases that are worth simplifying. Right now we want to look
1116 // for loads that are used by a switch or by the condition for the branch. If
1117 // we see one, check to see if it's partially redundant. If so, insert a PHI
1118 // which can then be used to thread the values.
1119 Value *SimplifyValue = CondWithoutFreeze;
1120
1121 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1122 if (isa<Constant>(CondCmp->getOperand(1)))
1123 SimplifyValue = CondCmp->getOperand(0);
1124
1125 // TODO: There are other places where load PRE would be profitable, such as
1126 // more complex comparisons.
1127 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1129 return true;
1130
1131 // Before threading, try to propagate profile data backwards:
1132 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1133 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1135
1136 // Handle a variety of cases where we are branching on something derived from
1137 // a PHI node in the current block. If we can prove that any predecessors
1138 // compute a predictable value based on a PHI node, thread those predecessors.
1139 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1140 return true;
1141
1142 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1143 // the current block, see if we can simplify.
1144 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1145 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1146 return processBranchOnPHI(PN);
1147
1148 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1149 if (CondInst->getOpcode() == Instruction::Xor &&
1150 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1151 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1152
1153 // Search for a stronger dominating condition that can be used to simplify a
1154 // conditional branch leaving BB.
1156 return true;
1157
1158 return false;
1159}
1160
1162 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1163 if (!BI || !BI->isConditional())
1164 return false;
1165
1166 Value *Cond = BI->getCondition();
1167 // Assuming that predecessor's branch was taken, if pred's branch condition
1168 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1169 // freeze(Cond) is either true or a nondeterministic value.
1170 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1171 // without affecting other instructions.
1172 auto *FICond = dyn_cast<FreezeInst>(Cond);
1173 if (FICond && FICond->hasOneUse())
1174 Cond = FICond->getOperand(0);
1175 else
1176 FICond = nullptr;
1177
1178 BasicBlock *CurrentBB = BB;
1179 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1180 unsigned Iter = 0;
1181
1182 auto &DL = BB->getDataLayout();
1183
1184 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1185 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1186 if (!PBI || !PBI->isConditional())
1187 return false;
1188 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1189 return false;
1190
1191 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1192 std::optional<bool> Implication =
1193 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1194
1195 // If the branch condition of BB (which is Cond) and CurrentPred are
1196 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1197 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1198 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1199 FICond->getOperand(0))
1200 Implication = CondIsTrue;
1201 }
1202
1203 if (Implication) {
1204 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1205 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1206 RemoveSucc->removePredecessor(BB);
1207 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());
1208 UncondBI->setDebugLoc(BI->getDebugLoc());
1209 ++NumFolds;
1210 BI->eraseFromParent();
1211 if (FICond)
1212 FICond->eraseFromParent();
1213
1214 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1215 if (auto *BPI = getBPI())
1216 BPI->eraseBlock(BB);
1217 return true;
1218 }
1219 CurrentBB = CurrentPred;
1220 CurrentPred = CurrentBB->getSinglePredecessor();
1221 }
1222
1223 return false;
1224}
1225
1226/// Return true if Op is an instruction defined in the given block.
1228 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1229 if (OpInst->getParent() == BB)
1230 return true;
1231 return false;
1232}
1233
1234/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1235/// redundant load instruction, eliminate it by replacing it with a PHI node.
1236/// This is an important optimization that encourages jump threading, and needs
1237/// to be run interlaced with other jump threading tasks.
1239 // Don't hack volatile and ordered loads.
1240 if (!LoadI->isUnordered()) return false;
1241
1242 // If the load is defined in a block with exactly one predecessor, it can't be
1243 // partially redundant.
1244 BasicBlock *LoadBB = LoadI->getParent();
1245 if (LoadBB->getSinglePredecessor())
1246 return false;
1247
1248 // If the load is defined in an EH pad, it can't be partially redundant,
1249 // because the edges between the invoke and the EH pad cannot have other
1250 // instructions between them.
1251 if (LoadBB->isEHPad())
1252 return false;
1253
1254 Value *LoadedPtr = LoadI->getOperand(0);
1255
1256 // If the loaded operand is defined in the LoadBB and its not a phi,
1257 // it can't be available in predecessors.
1258 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1259 return false;
1260
1261 // Scan a few instructions up from the load, to see if it is obviously live at
1262 // the entry to its block.
1263 BasicBlock::iterator BBIt(LoadI);
1264 bool IsLoadCSE;
1265 BatchAAResults BatchAA(*AA);
1266 // The dominator tree is updated lazily and may not be valid at this point.
1267 BatchAA.disableDominatorTree();
1268 if (Value *AvailableVal = FindAvailableLoadedValue(
1269 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1270 // If the value of the load is locally available within the block, just use
1271 // it. This frequently occurs for reg2mem'd allocas.
1272
1273 if (IsLoadCSE) {
1274 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1275 combineMetadataForCSE(NLoadI, LoadI, false);
1276 LVI->forgetValue(NLoadI);
1277 };
1278
1279 // If the returned value is the load itself, replace with poison. This can
1280 // only happen in dead loops.
1281 if (AvailableVal == LoadI)
1282 AvailableVal = PoisonValue::get(LoadI->getType());
1283 if (AvailableVal->getType() != LoadI->getType()) {
1284 AvailableVal = CastInst::CreateBitOrPointerCast(
1285 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1286 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1287 }
1288 LoadI->replaceAllUsesWith(AvailableVal);
1289 LoadI->eraseFromParent();
1290 return true;
1291 }
1292
1293 // Otherwise, if we scanned the whole block and got to the top of the block,
1294 // we know the block is locally transparent to the load. If not, something
1295 // might clobber its value.
1296 if (BBIt != LoadBB->begin())
1297 return false;
1298
1299 // If all of the loads and stores that feed the value have the same AA tags,
1300 // then we can propagate them onto any newly inserted loads.
1301 AAMDNodes AATags = LoadI->getAAMetadata();
1302
1303 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1304
1305 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1306
1307 AvailablePredsTy AvailablePreds;
1308 BasicBlock *OneUnavailablePred = nullptr;
1310
1311 // If we got here, the loaded value is transparent through to the start of the
1312 // block. Check to see if it is available in any of the predecessor blocks.
1313 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1314 // If we already scanned this predecessor, skip it.
1315 if (!PredsScanned.insert(PredBB).second)
1316 continue;
1317
1318 BBIt = PredBB->end();
1319 unsigned NumScanedInst = 0;
1320 Value *PredAvailable = nullptr;
1321 // NOTE: We don't CSE load that is volatile or anything stronger than
1322 // unordered, that should have been checked when we entered the function.
1323 assert(LoadI->isUnordered() &&
1324 "Attempting to CSE volatile or atomic loads");
1325 // If this is a load on a phi pointer, phi-translate it and search
1326 // for available load/store to the pointer in predecessors.
1327 Type *AccessTy = LoadI->getType();
1328 const auto &DL = LoadI->getDataLayout();
1329 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1330 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1331 AATags);
1332 PredAvailable = findAvailablePtrLoadStore(
1333 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1334 &BatchAA, &IsLoadCSE, &NumScanedInst);
1335
1336 // If PredBB has a single predecessor, continue scanning through the
1337 // single predecessor.
1338 BasicBlock *SinglePredBB = PredBB;
1339 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1340 NumScanedInst < DefMaxInstsToScan) {
1341 SinglePredBB = SinglePredBB->getSinglePredecessor();
1342 if (SinglePredBB) {
1343 BBIt = SinglePredBB->end();
1344 PredAvailable = findAvailablePtrLoadStore(
1345 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1346 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1347 &NumScanedInst);
1348 }
1349 }
1350
1351 if (!PredAvailable) {
1352 OneUnavailablePred = PredBB;
1353 continue;
1354 }
1355
1356 if (IsLoadCSE)
1357 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1358
1359 // If so, this load is partially redundant. Remember this info so that we
1360 // can create a PHI node.
1361 AvailablePreds.emplace_back(PredBB, PredAvailable);
1362 }
1363
1364 // If the loaded value isn't available in any predecessor, it isn't partially
1365 // redundant.
1366 if (AvailablePreds.empty()) return false;
1367
1368 // Okay, the loaded value is available in at least one (and maybe all!)
1369 // predecessors. If the value is unavailable in more than one unique
1370 // predecessor, we want to insert a merge block for those common predecessors.
1371 // This ensures that we only have to insert one reload, thus not increasing
1372 // code size.
1373 BasicBlock *UnavailablePred = nullptr;
1374
1375 // If the value is unavailable in one of predecessors, we will end up
1376 // inserting a new instruction into them. It is only valid if all the
1377 // instructions before LoadI are guaranteed to pass execution to its
1378 // successor, or if LoadI is safe to speculate.
1379 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1380 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1381 // It requires domination tree analysis, so for this simple case it is an
1382 // overkill.
1383 if (PredsScanned.size() != AvailablePreds.size() &&
1385 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1387 return false;
1388
1389 // If there is exactly one predecessor where the value is unavailable, the
1390 // already computed 'OneUnavailablePred' block is it. If it ends in an
1391 // unconditional branch, we know that it isn't a critical edge.
1392 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1393 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1394 UnavailablePred = OneUnavailablePred;
1395 } else if (PredsScanned.size() != AvailablePreds.size()) {
1396 // Otherwise, we had multiple unavailable predecessors or we had a critical
1397 // edge from the one.
1398 SmallVector<BasicBlock*, 8> PredsToSplit;
1399 SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1400
1401 for (const auto &AvailablePred : AvailablePreds)
1402 AvailablePredSet.insert(AvailablePred.first);
1403
1404 // Add all the unavailable predecessors to the PredsToSplit list.
1405 for (BasicBlock *P : predecessors(LoadBB)) {
1406 // If the predecessor is an indirect goto, we can't split the edge.
1407 if (isa<IndirectBrInst>(P->getTerminator()))
1408 return false;
1409
1410 if (!AvailablePredSet.count(P))
1411 PredsToSplit.push_back(P);
1412 }
1413
1414 // Split them out to their own block.
1415 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1416 }
1417
1418 // If the value isn't available in all predecessors, then there will be
1419 // exactly one where it isn't available. Insert a load on that edge and add
1420 // it to the AvailablePreds list.
1421 if (UnavailablePred) {
1422 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1423 "Can't handle critical edge here!");
1424 LoadInst *NewVal = new LoadInst(
1425 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1426 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1427 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1428 UnavailablePred->getTerminator()->getIterator());
1429 NewVal->setDebugLoc(LoadI->getDebugLoc());
1430 if (AATags)
1431 NewVal->setAAMetadata(AATags);
1432
1433 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1434 }
1435
1436 // Now we know that each predecessor of this block has a value in
1437 // AvailablePreds, sort them for efficient access as we're walking the preds.
1438 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1439
1440 // Create a PHI node at the start of the block for the PRE'd load value.
1441 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1442 PN->insertBefore(LoadBB->begin());
1443 PN->takeName(LoadI);
1444 PN->setDebugLoc(LoadI->getDebugLoc());
1445
1446 // Insert new entries into the PHI for each predecessor. A single block may
1447 // have multiple entries here.
1448 for (BasicBlock *P : predecessors(LoadBB)) {
1449 AvailablePredsTy::iterator I =
1450 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1451
1452 assert(I != AvailablePreds.end() && I->first == P &&
1453 "Didn't find entry for predecessor!");
1454
1455 // If we have an available predecessor but it requires casting, insert the
1456 // cast in the predecessor and use the cast. Note that we have to update the
1457 // AvailablePreds vector as we go so that all of the PHI entries for this
1458 // predecessor use the same bitcast.
1459 Value *&PredV = I->second;
1460 if (PredV->getType() != LoadI->getType())
1462 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1463
1464 PN->addIncoming(PredV, I->first);
1465 }
1466
1467 for (LoadInst *PredLoadI : CSELoads) {
1468 combineMetadataForCSE(PredLoadI, LoadI, true);
1469 LVI->forgetValue(PredLoadI);
1470 }
1471
1472 LoadI->replaceAllUsesWith(PN);
1473 LoadI->eraseFromParent();
1474
1475 return true;
1476}
1477
1478/// findMostPopularDest - The specified list contains multiple possible
1479/// threadable destinations. Pick the one that occurs the most frequently in
1480/// the list.
1481static BasicBlock *
1483 const SmallVectorImpl<std::pair<BasicBlock *,
1484 BasicBlock *>> &PredToDestList) {
1485 assert(!PredToDestList.empty());
1486
1487 // Determine popularity. If there are multiple possible destinations, we
1488 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1489 // blocks with known and real destinations to threading undef. We'll handle
1490 // them later if interesting.
1491 MapVector<BasicBlock *, unsigned> DestPopularity;
1492
1493 // Populate DestPopularity with the successors in the order they appear in the
1494 // successor list. This way, we ensure determinism by iterating it in the
1495 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1496 // return nullptr when PredToDestList contains nullptr only.
1497 DestPopularity[nullptr] = 0;
1498 for (auto *SuccBB : successors(BB))
1499 DestPopularity[SuccBB] = 0;
1500
1501 for (const auto &PredToDest : PredToDestList)
1502 if (PredToDest.second)
1503 DestPopularity[PredToDest.second]++;
1504
1505 // Find the most popular dest.
1506 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1507
1508 // Okay, we have finally picked the most popular destination.
1509 return MostPopular->first;
1510}
1511
1512// Try to evaluate the value of V when the control flows from PredPredBB to
1513// BB->getSinglePredecessor() and then on to BB.
1515 BasicBlock *PredPredBB,
1516 Value *V,
1517 const DataLayout &DL) {
1518 BasicBlock *PredBB = BB->getSinglePredecessor();
1519 assert(PredBB && "Expected a single predecessor");
1520
1521 if (Constant *Cst = dyn_cast<Constant>(V)) {
1522 return Cst;
1523 }
1524
1525 // Consult LVI if V is not an instruction in BB or PredBB.
1526 Instruction *I = dyn_cast<Instruction>(V);
1527 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1528 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1529 }
1530
1531 // Look into a PHI argument.
1532 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1533 if (PHI->getParent() == PredBB)
1534 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1535 return nullptr;
1536 }
1537
1538 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1539 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1540 if (CondCmp->getParent() == BB) {
1541 Constant *Op0 =
1542 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
1543 Constant *Op1 =
1544 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
1545 if (Op0 && Op1) {
1546 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1547 Op1, DL);
1548 }
1549 }
1550 return nullptr;
1551 }
1552
1553 return nullptr;
1554}
1555
1557 ConstantPreference Preference,
1558 Instruction *CxtI) {
1559 // If threading this would thread across a loop header, don't even try to
1560 // thread the edge.
1561 if (LoopHeaders.count(BB))
1562 return false;
1563
1564 PredValueInfoTy PredValues;
1565 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1566 CxtI)) {
1567 // We don't have known values in predecessors. See if we can thread through
1568 // BB and its sole predecessor.
1570 }
1571
1572 assert(!PredValues.empty() &&
1573 "computeValueKnownInPredecessors returned true with no values");
1574
1575 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1576 for (const auto &PredValue : PredValues) {
1577 dbgs() << " BB '" << BB->getName()
1578 << "': FOUND condition = " << *PredValue.first
1579 << " for pred '" << PredValue.second->getName() << "'.\n";
1580 });
1581
1582 // Decide what we want to thread through. Convert our list of known values to
1583 // a list of known destinations for each pred. This also discards duplicate
1584 // predecessors and keeps track of the undefined inputs (which are represented
1585 // as a null dest in the PredToDestList).
1588
1589 BasicBlock *OnlyDest = nullptr;
1590 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1591 Constant *OnlyVal = nullptr;
1592 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1593
1594 for (const auto &PredValue : PredValues) {
1595 BasicBlock *Pred = PredValue.second;
1596 if (!SeenPreds.insert(Pred).second)
1597 continue; // Duplicate predecessor entry.
1598
1599 Constant *Val = PredValue.first;
1600
1601 BasicBlock *DestBB;
1602 if (isa<UndefValue>(Val))
1603 DestBB = nullptr;
1604 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1605 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1606 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1607 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1608 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1609 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1610 } else {
1611 assert(isa<IndirectBrInst>(BB->getTerminator())
1612 && "Unexpected terminator");
1613 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1614 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1615 }
1616
1617 // If we have exactly one destination, remember it for efficiency below.
1618 if (PredToDestList.empty()) {
1619 OnlyDest = DestBB;
1620 OnlyVal = Val;
1621 } else {
1622 if (OnlyDest != DestBB)
1623 OnlyDest = MultipleDestSentinel;
1624 // It possible we have same destination, but different value, e.g. default
1625 // case in switchinst.
1626 if (Val != OnlyVal)
1627 OnlyVal = MultipleVal;
1628 }
1629
1630 // If the predecessor ends with an indirect goto, we can't change its
1631 // destination.
1632 if (isa<IndirectBrInst>(Pred->getTerminator()))
1633 continue;
1634
1635 PredToDestList.emplace_back(Pred, DestBB);
1636 }
1637
1638 // If all edges were unthreadable, we fail.
1639 if (PredToDestList.empty())
1640 return false;
1641
1642 // If all the predecessors go to a single known successor, we want to fold,
1643 // not thread. By doing so, we do not need to duplicate the current block and
1644 // also miss potential opportunities in case we dont/cant duplicate.
1645 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1646 if (BB->hasNPredecessors(PredToDestList.size())) {
1647 bool SeenFirstBranchToOnlyDest = false;
1648 std::vector <DominatorTree::UpdateType> Updates;
1649 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1650 for (BasicBlock *SuccBB : successors(BB)) {
1651 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1652 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1653 } else {
1654 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1655 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1656 }
1657 }
1658
1659 // Finally update the terminator.
1660 Instruction *Term = BB->getTerminator();
1661 Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());
1662 NewBI->setDebugLoc(Term->getDebugLoc());
1663 ++NumFolds;
1664 Term->eraseFromParent();
1665 DTU->applyUpdatesPermissive(Updates);
1666 if (auto *BPI = getBPI())
1667 BPI->eraseBlock(BB);
1668
1669 // If the condition is now dead due to the removal of the old terminator,
1670 // erase it.
1671 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1672 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1673 CondInst->eraseFromParent();
1674 // We can safely replace *some* uses of the CondInst if it has
1675 // exactly one value as returned by LVI. RAUW is incorrect in the
1676 // presence of guards and assumes, that have the `Cond` as the use. This
1677 // is because we use the guards/assume to reason about the `Cond` value
1678 // at the end of block, but RAUW unconditionally replaces all uses
1679 // including the guards/assumes themselves and the uses before the
1680 // guard/assume.
1681 else if (OnlyVal && OnlyVal != MultipleVal)
1682 replaceFoldableUses(CondInst, OnlyVal, BB);
1683 }
1684 return true;
1685 }
1686 }
1687
1688 // Determine which is the most common successor. If we have many inputs and
1689 // this block is a switch, we want to start by threading the batch that goes
1690 // to the most popular destination first. If we only know about one
1691 // threadable destination (the common case) we can avoid this.
1692 BasicBlock *MostPopularDest = OnlyDest;
1693
1694 if (MostPopularDest == MultipleDestSentinel) {
1695 // Remove any loop headers from the Dest list, threadEdge conservatively
1696 // won't process them, but we might have other destination that are eligible
1697 // and we still want to process.
1698 erase_if(PredToDestList,
1699 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1700 return LoopHeaders.contains(PredToDest.second);
1701 });
1702
1703 if (PredToDestList.empty())
1704 return false;
1705
1706 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1707 }
1708
1709 // Now that we know what the most popular destination is, factor all
1710 // predecessors that will jump to it into a single predecessor.
1711 SmallVector<BasicBlock*, 16> PredsToFactor;
1712 for (const auto &PredToDest : PredToDestList)
1713 if (PredToDest.second == MostPopularDest) {
1714 BasicBlock *Pred = PredToDest.first;
1715
1716 // This predecessor may be a switch or something else that has multiple
1717 // edges to the block. Factor each of these edges by listing them
1718 // according to # occurrences in PredsToFactor.
1719 for (BasicBlock *Succ : successors(Pred))
1720 if (Succ == BB)
1721 PredsToFactor.push_back(Pred);
1722 }
1723
1724 // If the threadable edges are branching on an undefined value, we get to pick
1725 // the destination that these predecessors should get to.
1726 if (!MostPopularDest)
1727 MostPopularDest = BB->getTerminator()->
1728 getSuccessor(getBestDestForJumpOnUndef(BB));
1729
1730 // Ok, try to thread it!
1731 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1732}
1733
1734/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1735/// a PHI node (or freeze PHI) in the current block. See if there are any
1736/// simplifications we can do based on inputs to the phi node.
1738 BasicBlock *BB = PN->getParent();
1739
1740 // TODO: We could make use of this to do it once for blocks with common PHI
1741 // values.
1743 PredBBs.resize(1);
1744
1745 // If any of the predecessor blocks end in an unconditional branch, we can
1746 // *duplicate* the conditional branch into that block in order to further
1747 // encourage jump threading and to eliminate cases where we have branch on a
1748 // phi of an icmp (branch on icmp is much better).
1749 // This is still beneficial when a frozen phi is used as the branch condition
1750 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1751 // to br(icmp(freeze ...)).
1752 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1753 BasicBlock *PredBB = PN->getIncomingBlock(i);
1754 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1755 if (PredBr->isUnconditional()) {
1756 PredBBs[0] = PredBB;
1757 // Try to duplicate BB into PredBB.
1758 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1759 return true;
1760 }
1761 }
1762
1763 return false;
1764}
1765
1766/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1767/// a xor instruction in the current block. See if there are any
1768/// simplifications we can do based on inputs to the xor.
1770 BasicBlock *BB = BO->getParent();
1771
1772 // If either the LHS or RHS of the xor is a constant, don't do this
1773 // optimization.
1774 if (isa<ConstantInt>(BO->getOperand(0)) ||
1775 isa<ConstantInt>(BO->getOperand(1)))
1776 return false;
1777
1778 // If the first instruction in BB isn't a phi, we won't be able to infer
1779 // anything special about any particular predecessor.
1780 if (!isa<PHINode>(BB->front()))
1781 return false;
1782
1783 // If this BB is a landing pad, we won't be able to split the edge into it.
1784 if (BB->isEHPad())
1785 return false;
1786
1787 // If we have a xor as the branch input to this block, and we know that the
1788 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1789 // the condition into the predecessor and fix that value to true, saving some
1790 // logical ops on that path and encouraging other paths to simplify.
1791 //
1792 // This copies something like this:
1793 //
1794 // BB:
1795 // %X = phi i1 [1], [%X']
1796 // %Y = icmp eq i32 %A, %B
1797 // %Z = xor i1 %X, %Y
1798 // br i1 %Z, ...
1799 //
1800 // Into:
1801 // BB':
1802 // %Y = icmp ne i32 %A, %B
1803 // br i1 %Y, ...
1804
1805 PredValueInfoTy XorOpValues;
1806 bool isLHS = true;
1807 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1808 WantInteger, BO)) {
1809 assert(XorOpValues.empty());
1810 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1811 WantInteger, BO))
1812 return false;
1813 isLHS = false;
1814 }
1815
1816 assert(!XorOpValues.empty() &&
1817 "computeValueKnownInPredecessors returned true with no values");
1818
1819 // Scan the information to see which is most popular: true or false. The
1820 // predecessors can be of the set true, false, or undef.
1821 unsigned NumTrue = 0, NumFalse = 0;
1822 for (const auto &XorOpValue : XorOpValues) {
1823 if (isa<UndefValue>(XorOpValue.first))
1824 // Ignore undefs for the count.
1825 continue;
1826 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1827 ++NumFalse;
1828 else
1829 ++NumTrue;
1830 }
1831
1832 // Determine which value to split on, true, false, or undef if neither.
1833 ConstantInt *SplitVal = nullptr;
1834 if (NumTrue > NumFalse)
1835 SplitVal = ConstantInt::getTrue(BB->getContext());
1836 else if (NumTrue != 0 || NumFalse != 0)
1837 SplitVal = ConstantInt::getFalse(BB->getContext());
1838
1839 // Collect all of the blocks that this can be folded into so that we can
1840 // factor this once and clone it once.
1841 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1842 for (const auto &XorOpValue : XorOpValues) {
1843 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1844 continue;
1845
1846 BlocksToFoldInto.push_back(XorOpValue.second);
1847 }
1848
1849 // If we inferred a value for all of the predecessors, then duplication won't
1850 // help us. However, we can just replace the LHS or RHS with the constant.
1851 if (BlocksToFoldInto.size() ==
1852 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1853 if (!SplitVal) {
1854 // If all preds provide undef, just nuke the xor, because it is undef too.
1856 BO->eraseFromParent();
1857 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1858 // If all preds provide 0, replace the xor with the other input.
1859 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1860 BO->eraseFromParent();
1861 } else {
1862 // If all preds provide 1, set the computed value to 1.
1863 BO->setOperand(!isLHS, SplitVal);
1864 }
1865
1866 return true;
1867 }
1868
1869 // If any of predecessors end with an indirect goto, we can't change its
1870 // destination.
1871 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1872 return isa<IndirectBrInst>(Pred->getTerminator());
1873 }))
1874 return false;
1875
1876 // Try to duplicate BB into PredBB.
1877 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1878}
1879
1880/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1881/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1882/// NewPred using the entries from OldPred (suitably mapped).
1884 BasicBlock *OldPred,
1885 BasicBlock *NewPred,
1887 for (PHINode &PN : PHIBB->phis()) {
1888 // Ok, we have a PHI node. Figure out what the incoming value was for the
1889 // DestBlock.
1890 Value *IV = PN.getIncomingValueForBlock(OldPred);
1891
1892 // Remap the value if necessary.
1893 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1895 if (I != ValueMap.end())
1896 IV = I->second;
1897 }
1898
1899 PN.addIncoming(IV, NewPred);
1900 }
1901}
1902
1903/// Merge basic block BB into its sole predecessor if possible.
1905 BasicBlock *SinglePred = BB->getSinglePredecessor();
1906 if (!SinglePred)
1907 return false;
1908
1909 const Instruction *TI = SinglePred->getTerminator();
1910 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1911 SinglePred == BB || hasAddressTakenAndUsed(BB))
1912 return false;
1913
1914 // If SinglePred was a loop header, BB becomes one.
1915 if (LoopHeaders.erase(SinglePred))
1916 LoopHeaders.insert(BB);
1917
1918 LVI->eraseBlock(SinglePred);
1919 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1920
1921 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1922 // BB code within one basic block `BB`), we need to invalidate the LVI
1923 // information associated with BB, because the LVI information need not be
1924 // true for all of BB after the merge. For example,
1925 // Before the merge, LVI info and code is as follows:
1926 // SinglePred: <LVI info1 for %p val>
1927 // %y = use of %p
1928 // call @exit() // need not transfer execution to successor.
1929 // assume(%p) // from this point on %p is true
1930 // br label %BB
1931 // BB: <LVI info2 for %p val, i.e. %p is true>
1932 // %x = use of %p
1933 // br label exit
1934 //
1935 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1936 // (info2 and info1 respectively). After the merge and the deletion of the
1937 // LVI info1 for SinglePred. We have the following code:
1938 // BB: <LVI info2 for %p val>
1939 // %y = use of %p
1940 // call @exit()
1941 // assume(%p)
1942 // %x = use of %p <-- LVI info2 is correct from here onwards.
1943 // br label exit
1944 // LVI info2 for BB is incorrect at the beginning of BB.
1945
1946 // Invalidate LVI information for BB if the LVI is not provably true for
1947 // all of BB.
1949 LVI->eraseBlock(BB);
1950 return true;
1951}
1952
1953/// Update the SSA form. NewBB contains instructions that are copied from BB.
1954/// ValueMapping maps old values in BB to new ones in NewBB.
1956 ValueToValueMapTy &ValueMapping) {
1957 // If there were values defined in BB that are used outside the block, then we
1958 // now have to update all uses of the value to use either the original value,
1959 // the cloned value, or some PHI derived value. This can require arbitrary
1960 // PHI insertion, of which we are prepared to do, clean these up now.
1961 SSAUpdater SSAUpdate;
1962 SmallVector<Use *, 16> UsesToRename;
1964 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1965
1966 for (Instruction &I : *BB) {
1967 // Scan all uses of this instruction to see if it is used outside of its
1968 // block, and if so, record them in UsesToRename.
1969 for (Use &U : I.uses()) {
1970 Instruction *User = cast<Instruction>(U.getUser());
1971 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1972 if (UserPN->getIncomingBlock(U) == BB)
1973 continue;
1974 } else if (User->getParent() == BB)
1975 continue;
1976
1977 UsesToRename.push_back(&U);
1978 }
1979
1980 // Find debug values outside of the block
1981 findDbgValues(DbgValues, &I, &DbgVariableRecords);
1982 llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
1983 return DbgVal->getParent() == BB;
1984 });
1985 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
1986 return DbgVarRec->getParent() == BB;
1987 });
1988
1989 // If there are no uses outside the block, we're done with this instruction.
1990 if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())
1991 continue;
1992 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1993
1994 // We found a use of I outside of BB. Rename all uses of I that are outside
1995 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1996 // with the two values we know.
1997 SSAUpdate.Initialize(I.getType(), I.getName());
1998 SSAUpdate.AddAvailableValue(BB, &I);
1999 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2000
2001 while (!UsesToRename.empty())
2002 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2003 if (!DbgValues.empty() || !DbgVariableRecords.empty()) {
2004 SSAUpdate.UpdateDebugValues(&I, DbgValues);
2005 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
2006 DbgValues.clear();
2007 DbgVariableRecords.clear();
2008 }
2009
2010 LLVM_DEBUG(dbgs() << "\n");
2011 }
2012}
2013
2014/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2015/// arguments that come from PredBB. Return the map from the variables in the
2016/// source basic block to the variables in the newly created basic block.
2017
2021 BasicBlock *NewBB,
2022 BasicBlock *PredBB) {
2023 // We are going to have to map operands from the source basic block to the new
2024 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2025 // block, evaluate them to account for entry from PredBB.
2026
2027 // Retargets llvm.dbg.value to any renamed variables.
2028 auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
2029 auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2030 if (!DbgInstruction)
2031 return false;
2032
2033 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2034 for (auto DbgOperand : DbgInstruction->location_ops()) {
2035 auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2036 if (!DbgOperandInstruction)
2037 continue;
2038
2039 auto I = ValueMapping.find(DbgOperandInstruction);
2040 if (I != ValueMapping.end()) {
2041 OperandsToRemap.insert(
2042 std::pair<Value *, Value *>(DbgOperand, I->second));
2043 }
2044 }
2045
2046 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2047 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2048 return true;
2049 };
2050
2051 // Duplicate implementation of the above dbg.value code, using
2052 // DbgVariableRecords instead.
2053 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2054 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2055 for (auto *Op : DVR->location_ops()) {
2056 Instruction *OpInst = dyn_cast<Instruction>(Op);
2057 if (!OpInst)
2058 continue;
2059
2060 auto I = ValueMapping.find(OpInst);
2061 if (I != ValueMapping.end())
2062 OperandsToRemap.insert({OpInst, I->second});
2063 }
2064
2065 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2066 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2067 };
2068
2069 BasicBlock *RangeBB = BI->getParent();
2070
2071 // Clone the phi nodes of the source basic block into NewBB. The resulting
2072 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2073 // might need to rewrite the operand of the cloned phi.
2074 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2075 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2076 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2077 ValueMapping[PN] = NewPN;
2078 }
2079
2080 // Clone noalias scope declarations in the threaded block. When threading a
2081 // loop exit, we would otherwise end up with two idential scope declarations
2082 // visible at the same time.
2083 SmallVector<MDNode *> NoAliasScopes;
2084 DenseMap<MDNode *, MDNode *> ClonedScopes;
2085 LLVMContext &Context = PredBB->getContext();
2086 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2087 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2088
2089 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2090 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2091 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2092 RetargetDbgVariableRecordIfPossible(&DVR);
2093 };
2094
2095 // Clone the non-phi instructions of the source basic block into NewBB,
2096 // keeping track of the mapping and using it to remap operands in the cloned
2097 // instructions.
2098 for (; BI != BE; ++BI) {
2099 Instruction *New = BI->clone();
2100 New->setName(BI->getName());
2101 New->insertInto(NewBB, NewBB->end());
2102 ValueMapping[&*BI] = New;
2103 adaptNoAliasScopes(New, ClonedScopes, Context);
2104
2105 CloneAndRemapDbgInfo(New, &*BI);
2106
2107 if (RetargetDbgValueIfPossible(New))
2108 continue;
2109
2110 // Remap operands to patch up intra-block references.
2111 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2112 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2113 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2114 if (I != ValueMapping.end())
2115 New->setOperand(i, I->second);
2116 }
2117 }
2118
2119 // There may be DbgVariableRecords on the terminator, clone directly from
2120 // marker to marker as there isn't an instruction there.
2121 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2122 // Dump them at the end.
2123 DbgMarker *Marker = RangeBB->getMarker(BE);
2124 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2125 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2126 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2127 RetargetDbgVariableRecordIfPossible(&DVR);
2128 }
2129
2130 return;
2131}
2132
2133/// Attempt to thread through two successive basic blocks.
2135 Value *Cond) {
2136 // Consider:
2137 //
2138 // PredBB:
2139 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2140 // %tobool = icmp eq i32 %cond, 0
2141 // br i1 %tobool, label %BB, label ...
2142 //
2143 // BB:
2144 // %cmp = icmp eq i32* %var, null
2145 // br i1 %cmp, label ..., label ...
2146 //
2147 // We don't know the value of %var at BB even if we know which incoming edge
2148 // we take to BB. However, once we duplicate PredBB for each of its incoming
2149 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2150 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2151
2152 // Require that BB end with a Branch for simplicity.
2153 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2154 if (!CondBr)
2155 return false;
2156
2157 // BB must have exactly one predecessor.
2158 BasicBlock *PredBB = BB->getSinglePredecessor();
2159 if (!PredBB)
2160 return false;
2161
2162 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2163 // unconditional branch, we should be merging PredBB and BB instead. For
2164 // simplicity, we don't deal with a switch.
2165 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2166 if (!PredBBBranch || PredBBBranch->isUnconditional())
2167 return false;
2168
2169 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2170 // PredBB.
2171 if (PredBB->getSinglePredecessor())
2172 return false;
2173
2174 // Don't thread through PredBB if it contains a successor edge to itself, in
2175 // which case we would infinite loop. Suppose we are threading an edge from
2176 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2177 // successor edge to itself. If we allowed jump threading in this case, we
2178 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2179 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2180 // with another jump threading opportunity from PredBB.thread through PredBB
2181 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2182 // would keep peeling one iteration from PredBB.
2183 if (llvm::is_contained(successors(PredBB), PredBB))
2184 return false;
2185
2186 // Don't thread across a loop header.
2187 if (LoopHeaders.count(PredBB))
2188 return false;
2189
2190 // Avoid complication with duplicating EH pads.
2191 if (PredBB->isEHPad())
2192 return false;
2193
2194 // Find a predecessor that we can thread. For simplicity, we only consider a
2195 // successor edge out of BB to which we thread exactly one incoming edge into
2196 // PredBB.
2197 unsigned ZeroCount = 0;
2198 unsigned OneCount = 0;
2199 BasicBlock *ZeroPred = nullptr;
2200 BasicBlock *OnePred = nullptr;
2201 const DataLayout &DL = BB->getDataLayout();
2202 for (BasicBlock *P : predecessors(PredBB)) {
2203 // If PredPred ends with IndirectBrInst, we can't handle it.
2204 if (isa<IndirectBrInst>(P->getTerminator()))
2205 continue;
2206 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2208 if (CI->isZero()) {
2209 ZeroCount++;
2210 ZeroPred = P;
2211 } else if (CI->isOne()) {
2212 OneCount++;
2213 OnePred = P;
2214 }
2215 }
2216 }
2217
2218 // Disregard complicated cases where we have to thread multiple edges.
2219 BasicBlock *PredPredBB;
2220 if (ZeroCount == 1) {
2221 PredPredBB = ZeroPred;
2222 } else if (OneCount == 1) {
2223 PredPredBB = OnePred;
2224 } else {
2225 return false;
2226 }
2227
2228 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2229
2230 // If threading to the same block as we come from, we would infinite loop.
2231 if (SuccBB == BB) {
2232 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2233 << "' - would thread to self!\n");
2234 return false;
2235 }
2236
2237 // If threading this would thread across a loop header, don't thread the edge.
2238 // See the comments above findLoopHeaders for justifications and caveats.
2239 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2240 LLVM_DEBUG({
2241 bool BBIsHeader = LoopHeaders.count(BB);
2242 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2243 dbgs() << " Not threading across "
2244 << (BBIsHeader ? "loop header BB '" : "block BB '")
2245 << BB->getName() << "' to dest "
2246 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2247 << SuccBB->getName()
2248 << "' - it might create an irreducible loop!\n";
2249 });
2250 return false;
2251 }
2252
2253 // Compute the cost of duplicating BB and PredBB.
2254 unsigned BBCost = getJumpThreadDuplicationCost(
2255 TTI, BB, BB->getTerminator(), BBDupThreshold);
2256 unsigned PredBBCost = getJumpThreadDuplicationCost(
2257 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2258
2259 // Give up if costs are too high. We need to check BBCost and PredBBCost
2260 // individually before checking their sum because getJumpThreadDuplicationCost
2261 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2262 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2263 BBCost + PredBBCost > BBDupThreshold) {
2264 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2265 << "' - Cost is too high: " << PredBBCost
2266 << " for PredBB, " << BBCost << "for BB\n");
2267 return false;
2268 }
2269
2270 // Now we are ready to duplicate PredBB.
2271 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2272 return true;
2273}
2274
2276 BasicBlock *PredBB,
2277 BasicBlock *BB,
2278 BasicBlock *SuccBB) {
2279 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2280 << BB->getName() << "'\n");
2281
2282 // Build BPI/BFI before any changes are made to IR.
2283 bool HasProfile = doesBlockHaveProfileData(BB);
2284 auto *BFI = getOrCreateBFI(HasProfile);
2285 auto *BPI = getOrCreateBPI(BFI != nullptr);
2286
2287 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2288 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2289
2290 BasicBlock *NewBB =
2291 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2292 PredBB->getParent(), PredBB);
2293 NewBB->moveAfter(PredBB);
2294
2295 // Set the block frequency of NewBB.
2296 if (BFI) {
2297 assert(BPI && "It's expected BPI to exist along with BFI");
2298 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2299 BPI->getEdgeProbability(PredPredBB, PredBB);
2300 BFI->setBlockFreq(NewBB, NewBBFreq);
2301 }
2302
2303 // We are going to have to map operands from the original BB block to the new
2304 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2305 // to account for entry from PredPredBB.
2306 ValueToValueMapTy ValueMapping;
2307 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2308 PredPredBB);
2309
2310 // Copy the edge probabilities from PredBB to NewBB.
2311 if (BPI)
2312 BPI->copyEdgeProbabilities(PredBB, NewBB);
2313
2314 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2315 // This eliminates predecessors from PredPredBB, which requires us to simplify
2316 // any PHI nodes in PredBB.
2317 Instruction *PredPredTerm = PredPredBB->getTerminator();
2318 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2319 if (PredPredTerm->getSuccessor(i) == PredBB) {
2320 PredBB->removePredecessor(PredPredBB, true);
2321 PredPredTerm->setSuccessor(i, NewBB);
2322 }
2323
2324 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2325 ValueMapping);
2326 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2327 ValueMapping);
2328
2329 DTU->applyUpdatesPermissive(
2330 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2331 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2332 {DominatorTree::Insert, PredPredBB, NewBB},
2333 {DominatorTree::Delete, PredPredBB, PredBB}});
2334
2335 updateSSA(PredBB, NewBB, ValueMapping);
2336
2337 // Clean up things like PHI nodes with single operands, dead instructions,
2338 // etc.
2339 SimplifyInstructionsInBlock(NewBB, TLI);
2340 SimplifyInstructionsInBlock(PredBB, TLI);
2341
2342 SmallVector<BasicBlock *, 1> PredsToFactor;
2343 PredsToFactor.push_back(NewBB);
2344 threadEdge(BB, PredsToFactor, SuccBB);
2345}
2346
2347/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2349 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2350 BasicBlock *SuccBB) {
2351 // If threading to the same block as we come from, we would infinite loop.
2352 if (SuccBB == BB) {
2353 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2354 << "' - would thread to self!\n");
2355 return false;
2356 }
2357
2358 // If threading this would thread across a loop header, don't thread the edge.
2359 // See the comments above findLoopHeaders for justifications and caveats.
2360 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2361 LLVM_DEBUG({
2362 bool BBIsHeader = LoopHeaders.count(BB);
2363 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2364 dbgs() << " Not threading across "
2365 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2366 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2367 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2368 });
2369 return false;
2370 }
2371
2372 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2373 TTI, BB, BB->getTerminator(), BBDupThreshold);
2374 if (JumpThreadCost > BBDupThreshold) {
2375 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2376 << "' - Cost is too high: " << JumpThreadCost << "\n");
2377 return false;
2378 }
2379
2380 threadEdge(BB, PredBBs, SuccBB);
2381 return true;
2382}
2383
2384/// threadEdge - We have decided that it is safe and profitable to factor the
2385/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2386/// across BB. Transform the IR to reflect this change.
2388 const SmallVectorImpl<BasicBlock *> &PredBBs,
2389 BasicBlock *SuccBB) {
2390 assert(SuccBB != BB && "Don't create an infinite loop");
2391
2392 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2393 "Don't thread across loop headers");
2394
2395 // Build BPI/BFI before any changes are made to IR.
2396 bool HasProfile = doesBlockHaveProfileData(BB);
2397 auto *BFI = getOrCreateBFI(HasProfile);
2398 auto *BPI = getOrCreateBPI(BFI != nullptr);
2399
2400 // And finally, do it! Start by factoring the predecessors if needed.
2401 BasicBlock *PredBB;
2402 if (PredBBs.size() == 1)
2403 PredBB = PredBBs[0];
2404 else {
2405 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2406 << " common predecessors.\n");
2407 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2408 }
2409
2410 // And finally, do it!
2411 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2412 << "' to '" << SuccBB->getName()
2413 << ", across block:\n " << *BB << "\n");
2414
2415 LVI->threadEdge(PredBB, BB, SuccBB);
2416
2418 BB->getName()+".thread",
2419 BB->getParent(), BB);
2420 NewBB->moveAfter(PredBB);
2421
2422 // Set the block frequency of NewBB.
2423 if (BFI) {
2424 assert(BPI && "It's expected BPI to exist along with BFI");
2425 auto NewBBFreq =
2426 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2427 BFI->setBlockFreq(NewBB, NewBBFreq);
2428 }
2429
2430 // Copy all the instructions from BB to NewBB except the terminator.
2431 ValueToValueMapTy ValueMapping;
2432 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2433 PredBB);
2434
2435 // We didn't copy the terminator from BB over to NewBB, because there is now
2436 // an unconditional jump to SuccBB. Insert the unconditional jump.
2437 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2438 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2439
2440 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2441 // PHI nodes for NewBB now.
2442 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2443
2444 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2445 // eliminates predecessors from BB, which requires us to simplify any PHI
2446 // nodes in BB.
2447 Instruction *PredTerm = PredBB->getTerminator();
2448 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2449 if (PredTerm->getSuccessor(i) == BB) {
2450 BB->removePredecessor(PredBB, true);
2451 PredTerm->setSuccessor(i, NewBB);
2452 }
2453
2454 // Enqueue required DT updates.
2455 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2456 {DominatorTree::Insert, PredBB, NewBB},
2457 {DominatorTree::Delete, PredBB, BB}});
2458
2459 updateSSA(BB, NewBB, ValueMapping);
2460
2461 // At this point, the IR is fully up to date and consistent. Do a quick scan
2462 // over the new instructions and zap any that are constants or dead. This
2463 // frequently happens because of phi translation.
2464 SimplifyInstructionsInBlock(NewBB, TLI);
2465
2466 // Update the edge weight from BB to SuccBB, which should be less than before.
2467 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2468
2469 // Threaded an edge!
2470 ++NumThreads;
2471}
2472
2473/// Create a new basic block that will be the predecessor of BB and successor of
2474/// all blocks in Preds. When profile data is available, update the frequency of
2475/// this new block.
2476BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2478 const char *Suffix) {
2480
2481 // Collect the frequencies of all predecessors of BB, which will be used to
2482 // update the edge weight of the result of splitting predecessors.
2484 auto *BFI = getBFI();
2485 if (BFI) {
2486 auto *BPI = getOrCreateBPI(true);
2487 for (auto *Pred : Preds)
2488 FreqMap.insert(std::make_pair(
2489 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2490 }
2491
2492 // In the case when BB is a LandingPad block we create 2 new predecessors
2493 // instead of just one.
2494 if (BB->isLandingPad()) {
2495 std::string NewName = std::string(Suffix) + ".split-lp";
2496 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2497 } else {
2498 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2499 }
2500
2501 std::vector<DominatorTree::UpdateType> Updates;
2502 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2503 for (auto *NewBB : NewBBs) {
2504 BlockFrequency NewBBFreq(0);
2505 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2506 for (auto *Pred : predecessors(NewBB)) {
2507 Updates.push_back({DominatorTree::Delete, Pred, BB});
2508 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2509 if (BFI) // Update frequencies between Pred -> NewBB.
2510 NewBBFreq += FreqMap.lookup(Pred);
2511 }
2512 if (BFI) // Apply the summed frequency to NewBB.
2513 BFI->setBlockFreq(NewBB, NewBBFreq);
2514 }
2515
2516 DTU->applyUpdatesPermissive(Updates);
2517 return NewBBs[0];
2518}
2519
2520bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2521 const Instruction *TI = BB->getTerminator();
2522 if (!TI || TI->getNumSuccessors() < 2)
2523 return false;
2524
2525 return hasValidBranchWeightMD(*TI);
2526}
2527
2528/// Update the block frequency of BB and branch weight and the metadata on the
2529/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2530/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2531void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2532 BasicBlock *BB,
2533 BasicBlock *NewBB,
2534 BasicBlock *SuccBB,
2535 BlockFrequencyInfo *BFI,
2537 bool HasProfile) {
2538 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2539 "Both BFI & BPI should either be set or unset");
2540
2541 if (!BFI) {
2542 assert(!HasProfile &&
2543 "It's expected to have BFI/BPI when profile info exists");
2544 return;
2545 }
2546
2547 // As the edge from PredBB to BB is deleted, we have to update the block
2548 // frequency of BB.
2549 auto BBOrigFreq = BFI->getBlockFreq(BB);
2550 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2551 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2552 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2553 BFI->setBlockFreq(BB, BBNewFreq);
2554
2555 // Collect updated outgoing edges' frequencies from BB and use them to update
2556 // edge probabilities.
2557 SmallVector<uint64_t, 4> BBSuccFreq;
2558 for (BasicBlock *Succ : successors(BB)) {
2559 auto SuccFreq = (Succ == SuccBB)
2560 ? BB2SuccBBFreq - NewBBFreq
2561 : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2562 BBSuccFreq.push_back(SuccFreq.getFrequency());
2563 }
2564
2565 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2566
2568 if (MaxBBSuccFreq == 0)
2569 BBSuccProbs.assign(BBSuccFreq.size(),
2570 {1, static_cast<unsigned>(BBSuccFreq.size())});
2571 else {
2572 for (uint64_t Freq : BBSuccFreq)
2573 BBSuccProbs.push_back(
2574 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2575 // Normalize edge probabilities so that they sum up to one.
2577 BBSuccProbs.end());
2578 }
2579
2580 // Update edge probabilities in BPI.
2581 BPI->setEdgeProbability(BB, BBSuccProbs);
2582
2583 // Update the profile metadata as well.
2584 //
2585 // Don't do this if the profile of the transformed blocks was statically
2586 // estimated. (This could occur despite the function having an entry
2587 // frequency in completely cold parts of the CFG.)
2588 //
2589 // In this case we don't want to suggest to subsequent passes that the
2590 // calculated weights are fully consistent. Consider this graph:
2591 //
2592 // check_1
2593 // 50% / |
2594 // eq_1 | 50%
2595 // \ |
2596 // check_2
2597 // 50% / |
2598 // eq_2 | 50%
2599 // \ |
2600 // check_3
2601 // 50% / |
2602 // eq_3 | 50%
2603 // \ |
2604 //
2605 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2606 // the overall probabilities are inconsistent; the total probability that the
2607 // value is either 1, 2 or 3 is 150%.
2608 //
2609 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2610 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2611 // the loop exit edge. Then based solely on static estimation we would assume
2612 // the loop was extremely hot.
2613 //
2614 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2615 // shouldn't make edges extremely likely or unlikely based solely on static
2616 // estimation.
2617 if (BBSuccProbs.size() >= 2 && HasProfile) {
2619 for (auto Prob : BBSuccProbs)
2620 Weights.push_back(Prob.getNumerator());
2621
2622 auto TI = BB->getTerminator();
2623 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2624 }
2625}
2626
2627/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2628/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2629/// If we can duplicate the contents of BB up into PredBB do so now, this
2630/// improves the odds that the branch will be on an analyzable instruction like
2631/// a compare.
2633 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2634 assert(!PredBBs.empty() && "Can't handle an empty set");
2635
2636 // If BB is a loop header, then duplicating this block outside the loop would
2637 // cause us to transform this into an irreducible loop, don't do this.
2638 // See the comments above findLoopHeaders for justifications and caveats.
2639 if (LoopHeaders.count(BB)) {
2640 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2641 << "' into predecessor block '" << PredBBs[0]->getName()
2642 << "' - it might create an irreducible loop!\n");
2643 return false;
2644 }
2645
2646 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2647 TTI, BB, BB->getTerminator(), BBDupThreshold);
2648 if (DuplicationCost > BBDupThreshold) {
2649 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2650 << "' - Cost is too high: " << DuplicationCost << "\n");
2651 return false;
2652 }
2653
2654 // And finally, do it! Start by factoring the predecessors if needed.
2655 std::vector<DominatorTree::UpdateType> Updates;
2656 BasicBlock *PredBB;
2657 if (PredBBs.size() == 1)
2658 PredBB = PredBBs[0];
2659 else {
2660 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2661 << " common predecessors.\n");
2662 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2663 }
2664 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2665
2666 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2667 // of PredBB.
2668 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2669 << "' into end of '" << PredBB->getName()
2670 << "' to eliminate branch on phi. Cost: "
2671 << DuplicationCost << " block is:" << *BB << "\n");
2672
2673 // Unless PredBB ends with an unconditional branch, split the edge so that we
2674 // can just clone the bits from BB into the end of the new PredBB.
2675 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2676
2677 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2678 BasicBlock *OldPredBB = PredBB;
2679 PredBB = SplitEdge(OldPredBB, BB);
2680 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2681 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2682 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2683 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2684 }
2685
2686 // We are going to have to map operands from the original BB block into the
2687 // PredBB block. Evaluate PHI nodes in BB.
2688 ValueToValueMapTy ValueMapping;
2689
2690 BasicBlock::iterator BI = BB->begin();
2691 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2692 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2693 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2694 // mapping and using it to remap operands in the cloned instructions.
2695 for (; BI != BB->end(); ++BI) {
2696 Instruction *New = BI->clone();
2697 New->insertInto(PredBB, OldPredBranch->getIterator());
2698
2699 // Remap operands to patch up intra-block references.
2700 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2701 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2702 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2703 if (I != ValueMapping.end())
2704 New->setOperand(i, I->second);
2705 }
2706
2707 // Remap debug variable operands.
2708 remapDebugVariable(ValueMapping, New);
2709
2710 // If this instruction can be simplified after the operands are updated,
2711 // just use the simplified value instead. This frequently happens due to
2712 // phi translation.
2714 New,
2715 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2716 ValueMapping[&*BI] = IV;
2717 if (!New->mayHaveSideEffects()) {
2718 New->eraseFromParent();
2719 New = nullptr;
2720 // Clone debug-info on the elided instruction to the destination
2721 // position.
2722 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2723 }
2724 } else {
2725 ValueMapping[&*BI] = New;
2726 }
2727 if (New) {
2728 // Otherwise, insert the new instruction into the block.
2729 New->setName(BI->getName());
2730 // Clone across any debug-info attached to the old instruction.
2731 New->cloneDebugInfoFrom(&*BI);
2732 // Update Dominance from simplified New instruction operands.
2733 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2734 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2735 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2736 }
2737 }
2738
2739 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2740 // add entries to the PHI nodes for branch from PredBB now.
2741 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2742 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2743 ValueMapping);
2744 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2745 ValueMapping);
2746
2747 updateSSA(BB, PredBB, ValueMapping);
2748
2749 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2750 // that we nuked.
2751 BB->removePredecessor(PredBB, true);
2752
2753 // Remove the unconditional branch at the end of the PredBB block.
2754 OldPredBranch->eraseFromParent();
2755 if (auto *BPI = getBPI())
2756 BPI->copyEdgeProbabilities(BB, PredBB);
2757 DTU->applyUpdatesPermissive(Updates);
2758
2759 ++NumDupes;
2760 return true;
2761}
2762
2763// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2764// a Select instruction in Pred. BB has other predecessors and SI is used in
2765// a PHI node in BB. SI has no other use.
2766// A new basic block, NewBB, is created and SI is converted to compare and
2767// conditional branch. SI is erased from parent.
2769 SelectInst *SI, PHINode *SIUse,
2770 unsigned Idx) {
2771 // Expand the select.
2772 //
2773 // Pred --
2774 // | v
2775 // | NewBB
2776 // | |
2777 // |-----
2778 // v
2779 // BB
2780 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2781 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2782 BB->getParent(), BB);
2783 // Move the unconditional branch to NewBB.
2784 PredTerm->removeFromParent();
2785 PredTerm->insertInto(NewBB, NewBB->end());
2786 // Create a conditional branch and update PHI nodes.
2787 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2788 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2789 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2790 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2791 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2792
2793 uint64_t TrueWeight = 1;
2794 uint64_t FalseWeight = 1;
2795 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2796 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2797 (TrueWeight + FalseWeight) != 0) {
2800 TrueWeight, TrueWeight + FalseWeight));
2802 FalseWeight, TrueWeight + FalseWeight));
2803 // Update BPI if exists.
2804 if (auto *BPI = getBPI())
2805 BPI->setEdgeProbability(Pred, BP);
2806 }
2807 // Set the block frequency of NewBB.
2808 if (auto *BFI = getBFI()) {
2809 if ((TrueWeight + FalseWeight) == 0) {
2810 TrueWeight = 1;
2811 FalseWeight = 1;
2812 }
2814 TrueWeight, TrueWeight + FalseWeight);
2815 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2816 BFI->setBlockFreq(NewBB, NewBBFreq);
2817 }
2818
2819 // The select is now dead.
2820 SI->eraseFromParent();
2821 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2822 {DominatorTree::Insert, Pred, NewBB}});
2823
2824 // Update any other PHI nodes in BB.
2825 for (BasicBlock::iterator BI = BB->begin();
2826 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2827 if (Phi != SIUse)
2828 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2829}
2830
2832 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2833
2834 if (!CondPHI || CondPHI->getParent() != BB)
2835 return false;
2836
2837 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2838 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2839 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2840
2841 // The second and third condition can be potentially relaxed. Currently
2842 // the conditions help to simplify the code and allow us to reuse existing
2843 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2844 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2845 continue;
2846
2847 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2848 if (!PredTerm || !PredTerm->isUnconditional())
2849 continue;
2850
2851 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2852 return true;
2853 }
2854 return false;
2855}
2856
2857/// tryToUnfoldSelect - Look for blocks of the form
2858/// bb1:
2859/// %a = select
2860/// br bb2
2861///
2862/// bb2:
2863/// %p = phi [%a, %bb1] ...
2864/// %c = icmp %p
2865/// br i1 %c
2866///
2867/// And expand the select into a branch structure if one of its arms allows %c
2868/// to be folded. This later enables threading from bb1 over bb2.
2870 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2871 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2872 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2873
2874 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2875 CondLHS->getParent() != BB)
2876 return false;
2877
2878 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2879 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2880 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2881
2882 // Look if one of the incoming values is a select in the corresponding
2883 // predecessor.
2884 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2885 continue;
2886
2887 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2888 if (!PredTerm || !PredTerm->isUnconditional())
2889 continue;
2890
2891 // Now check if one of the select values would allow us to constant fold the
2892 // terminator in BB. We don't do the transform if both sides fold, those
2893 // cases will be threaded in any case.
2894 LazyValueInfo::Tristate LHSFolds =
2895 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2896 CondRHS, Pred, BB, CondCmp);
2897 LazyValueInfo::Tristate RHSFolds =
2898 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2899 CondRHS, Pred, BB, CondCmp);
2900 if ((LHSFolds != LazyValueInfo::Unknown ||
2901 RHSFolds != LazyValueInfo::Unknown) &&
2902 LHSFolds != RHSFolds) {
2903 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2904 return true;
2905 }
2906 }
2907 return false;
2908}
2909
2910/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2911/// same BB in the form
2912/// bb:
2913/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2914/// %s = select %p, trueval, falseval
2915///
2916/// or
2917///
2918/// bb:
2919/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2920/// %c = cmp %p, 0
2921/// %s = select %c, trueval, falseval
2922///
2923/// And expand the select into a branch structure. This later enables
2924/// jump-threading over bb in this pass.
2925///
2926/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2927/// select if the associated PHI has at least one constant. If the unfolded
2928/// select is not jump-threaded, it will be folded again in the later
2929/// optimizations.
2931 // This transform would reduce the quality of msan diagnostics.
2932 // Disable this transform under MemorySanitizer.
2933 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2934 return false;
2935
2936 // If threading this would thread across a loop header, don't thread the edge.
2937 // See the comments above findLoopHeaders for justifications and caveats.
2938 if (LoopHeaders.count(BB))
2939 return false;
2940
2941 for (BasicBlock::iterator BI = BB->begin();
2942 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2943 // Look for a Phi having at least one constant incoming value.
2944 if (llvm::all_of(PN->incoming_values(),
2945 [](Value *V) { return !isa<ConstantInt>(V); }))
2946 continue;
2947
2948 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2949 using namespace PatternMatch;
2950
2951 // Check if SI is in BB and use V as condition.
2952 if (SI->getParent() != BB)
2953 return false;
2954 Value *Cond = SI->getCondition();
2955 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2956 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2957 };
2958
2959 SelectInst *SI = nullptr;
2960 for (Use &U : PN->uses()) {
2961 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2962 // Look for a ICmp in BB that compares PN with a constant and is the
2963 // condition of a Select.
2964 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2965 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2966 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2967 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2968 SI = SelectI;
2969 break;
2970 }
2971 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2972 // Look for a Select in BB that uses PN as condition.
2973 if (isUnfoldCandidate(SelectI, U.get())) {
2974 SI = SelectI;
2975 break;
2976 }
2977 }
2978 }
2979
2980 if (!SI)
2981 continue;
2982 // Expand the select.
2983 Value *Cond = SI->getCondition();
2984 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2985 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
2986 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2987 Instruction *Term =
2988 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2989 BasicBlock *SplitBB = SI->getParent();
2990 BasicBlock *NewBB = Term->getParent();
2991 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
2992 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2993 NewPN->addIncoming(SI->getFalseValue(), BB);
2994 NewPN->setDebugLoc(SI->getDebugLoc());
2995 SI->replaceAllUsesWith(NewPN);
2996 SI->eraseFromParent();
2997 // NewBB and SplitBB are newly created blocks which require insertion.
2998 std::vector<DominatorTree::UpdateType> Updates;
2999 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
3000 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
3001 Updates.push_back({DominatorTree::Insert, BB, NewBB});
3002 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
3003 // BB's successors were moved to SplitBB, update DTU accordingly.
3004 for (auto *Succ : successors(SplitBB)) {
3005 Updates.push_back({DominatorTree::Delete, BB, Succ});
3006 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
3007 }
3008 DTU->applyUpdatesPermissive(Updates);
3009 return true;
3010 }
3011 return false;
3012}
3013
3014/// Try to propagate a guard from the current BB into one of its predecessors
3015/// in case if another branch of execution implies that the condition of this
3016/// guard is always true. Currently we only process the simplest case that
3017/// looks like:
3018///
3019/// Start:
3020/// %cond = ...
3021/// br i1 %cond, label %T1, label %F1
3022/// T1:
3023/// br label %Merge
3024/// F1:
3025/// br label %Merge
3026/// Merge:
3027/// %condGuard = ...
3028/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3029///
3030/// And cond either implies condGuard or !condGuard. In this case all the
3031/// instructions before the guard can be duplicated in both branches, and the
3032/// guard is then threaded to one of them.
3034 using namespace PatternMatch;
3035
3036 // We only want to deal with two predecessors.
3037 BasicBlock *Pred1, *Pred2;
3038 auto PI = pred_begin(BB), PE = pred_end(BB);
3039 if (PI == PE)
3040 return false;
3041 Pred1 = *PI++;
3042 if (PI == PE)
3043 return false;
3044 Pred2 = *PI++;
3045 if (PI != PE)
3046 return false;
3047 if (Pred1 == Pred2)
3048 return false;
3049
3050 // Try to thread one of the guards of the block.
3051 // TODO: Look up deeper than to immediate predecessor?
3052 auto *Parent = Pred1->getSinglePredecessor();
3053 if (!Parent || Parent != Pred2->getSinglePredecessor())
3054 return false;
3055
3056 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3057 for (auto &I : *BB)
3058 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3059 return true;
3060
3061 return false;
3062}
3063
3064/// Try to propagate the guard from BB which is the lower block of a diamond
3065/// to one of its branches, in case if diamond's condition implies guard's
3066/// condition.
3068 BranchInst *BI) {
3069 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3070 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3071 Value *GuardCond = Guard->getArgOperand(0);
3072 Value *BranchCond = BI->getCondition();
3073 BasicBlock *TrueDest = BI->getSuccessor(0);
3074 BasicBlock *FalseDest = BI->getSuccessor(1);
3075
3076 auto &DL = BB->getDataLayout();
3077 bool TrueDestIsSafe = false;
3078 bool FalseDestIsSafe = false;
3079
3080 // True dest is safe if BranchCond => GuardCond.
3081 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3082 if (Impl && *Impl)
3083 TrueDestIsSafe = true;
3084 else {
3085 // False dest is safe if !BranchCond => GuardCond.
3086 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3087 if (Impl && *Impl)
3088 FalseDestIsSafe = true;
3089 }
3090
3091 if (!TrueDestIsSafe && !FalseDestIsSafe)
3092 return false;
3093
3094 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3095 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3096
3097 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3098 Instruction *AfterGuard = Guard->getNextNode();
3099 unsigned Cost =
3100 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3101 if (Cost > BBDupThreshold)
3102 return false;
3103 // Duplicate all instructions before the guard and the guard itself to the
3104 // branch where implication is not proved.
3106 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3107 assert(GuardedBlock && "Could not create the guarded block?");
3108 // Duplicate all instructions before the guard in the unguarded branch.
3109 // Since we have successfully duplicated the guarded block and this block
3110 // has fewer instructions, we expect it to succeed.
3112 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3113 assert(UnguardedBlock && "Could not create the unguarded block?");
3114 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3115 << GuardedBlock->getName() << "\n");
3116 // Some instructions before the guard may still have uses. For them, we need
3117 // to create Phi nodes merging their copies in both guarded and unguarded
3118 // branches. Those instructions that have no uses can be just removed.
3120 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3121 if (!isa<PHINode>(&*BI))
3122 ToRemove.push_back(&*BI);
3123
3124 BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();
3125 assert(InsertionPoint != BB->end() && "Empty block?");
3126 // Substitute with Phis & remove.
3127 for (auto *Inst : reverse(ToRemove)) {
3128 if (!Inst->use_empty()) {
3129 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3130 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3131 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3132 NewPN->setDebugLoc(Inst->getDebugLoc());
3133 NewPN->insertBefore(InsertionPoint);
3134 Inst->replaceAllUsesWith(NewPN);
3135 }
3136 Inst->dropDbgRecords();
3137 Inst->eraseFromParent();
3138 }
3139 return true;
3140}
3141
3142PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3146
3147 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3148 // TODO: Would be nice to verify BPI/BFI consistency as well.
3149 return PA;
3150}
3151
3152template <typename AnalysisT>
3153typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3154 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3155
3156 // If there were no changes since last call to 'runExternalAnalysis' then all
3157 // analysis is either up to date or explicitly invalidated. Just go ahead and
3158 // run the "external" analysis.
3159 if (!ChangedSinceLastAnalysisUpdate) {
3160 assert(!DTU->hasPendingUpdates() &&
3161 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3162 // Run the "external" analysis.
3163 return &FAM->getResult<AnalysisT>(*F);
3164 }
3165 ChangedSinceLastAnalysisUpdate = false;
3166
3167 auto PA = getPreservedAnalysis();
3168 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3169 // as preserved.
3170 PA.preserve<BranchProbabilityAnalysis>();
3171 PA.preserve<BlockFrequencyAnalysis>();
3172 // Report everything except explicitly preserved as invalid.
3173 FAM->invalidate(*F, PA);
3174 // Update DT/PDT.
3175 DTU->flush();
3176 // Make sure DT/PDT are valid before running "external" analysis.
3177 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3178 assert((!DTU->hasPostDomTree() ||
3179 DTU->getPostDomTree().verify(
3181 // Run the "external" analysis.
3182 auto *Result = &FAM->getResult<AnalysisT>(*F);
3183 // Update analysis JumpThreading depends on and not explicitly preserved.
3184 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3185 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3186 AA = &FAM->getResult<AAManager>(*F);
3187
3188 return Result;
3189}
3190
3191BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3192 if (!BPI) {
3193 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3195 }
3196 return *BPI;
3197}
3198
3199BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3200 if (!BFI) {
3201 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3203 }
3204 return *BFI;
3205}
3206
3207// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3208// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3209// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3210BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3211 auto *Res = getBPI();
3212 if (Res)
3213 return Res;
3214
3215 if (Force)
3216 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3217
3218 return *BPI;
3219}
3220
3221BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3222 auto *Res = getBFI();
3223 if (Res)
3224 return Res;
3225
3226 if (Force)
3227 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3228
3229 return *BFI;
3230}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
#define P(N)
ppc ctr loops verify
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:507
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
Definition: BasicBlock.cpp:52
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:648
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:168
const Instruction & front() const
Definition: BasicBlock.h:461
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:202
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:285
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:479
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:294
DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:665
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:514
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition: Constants.h:890
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1833
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2555
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:864
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:41
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:723
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Per-instruction record of debug-info.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
const BasicBlock * getParent() const
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:78
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1720
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1706
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
bool isSpecialTerminator() const
Definition: Instruction.h:284
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:33
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:63
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
An instruction for reading from memory.
Definition: Instructions.h:173
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:217
bool isUnordered() const
Definition: Instructions.h:246
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:227
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:208
static LocationSize precise(uint64_t Value)
Metadata node.
Definition: Metadata.h:1067
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Representation for a specific memory location.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:193
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:188
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:200
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
'undef' values are things that do not have specified contents.
Definition: Constants.h:1385
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1795
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1066
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1042
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:3472
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:584
void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition: Local.cpp:3694
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:731
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:455
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:138
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1120
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3347
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:771
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
Value * simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
auto max_element(R &&Range)
Definition: STLExtras.h:1986
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
unsigned pred_size(const MachineBasicBlock *BB)
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1459