LLVM 23.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <memory>
76#include <utility>
77
78using namespace llvm;
79using namespace jumpthreading;
80
81#define DEBUG_TYPE "jump-threading"
82
83STATISTIC(NumThreads, "Number of jumps threaded");
84STATISTIC(NumFolds, "Number of terminators folded");
85STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86
88BBDuplicateThreshold("jump-threading-threshold",
89 cl::desc("Max block size to duplicate for jump threading"),
91
94 "jump-threading-implication-search-threshold",
95 cl::desc("The number of predecessors to search for a stronger "
96 "condition to use to thread over a weaker condition"),
98
100 "jump-threading-phi-threshold",
101 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
102 cl::Hidden);
103
105 "jump-threading-across-loop-headers",
106 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
107 cl::init(false), cl::Hidden);
108
109namespace llvm {
111}
112
114 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
115}
116
117// Update branch probability information according to conditional
118// branch probability. This is usually made possible for cloned branches
119// in inline instances by the context specific profile in the caller.
120// For instance,
121//
122// [Block PredBB]
123// [Branch PredBr]
124// if (t) {
125// Block A;
126// } else {
127// Block B;
128// }
129//
130// [Block BB]
131// cond = PN([true, %A], [..., %B]); // PHI node
132// [Branch CondBr]
133// if (cond) {
134// ... // P(cond == true) = 1%
135// }
136//
137// Here we know that when block A is taken, cond must be true, which means
138// P(cond == true | A) = 1
139//
140// Given that P(cond == true) = P(cond == true | A) * P(A) +
141// P(cond == true | B) * P(B)
142// we get:
143// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
144//
145// which gives us:
146// P(A) is less than P(cond == true), i.e.
147// P(t == true) <= P(cond == true)
148//
149// In other words, if we know P(cond == true) is unlikely, we know
150// that P(t == true) is also unlikely.
151//
154 if (!CondBr)
155 return;
156
157 uint64_t TrueWeight, FalseWeight;
158 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
159 return;
160
161 if (TrueWeight + FalseWeight == 0)
162 // Zero branch_weights do not give a hint for getting branch probabilities.
163 // Technically it would result in division by zero denominator, which is
164 // TrueWeight + FalseWeight.
165 return;
166
167 // Returns the outgoing edge of the dominating predecessor block
168 // that leads to the PhiNode's incoming block:
169 auto GetPredOutEdge =
170 [](BasicBlock *IncomingBB,
171 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
172 auto *PredBB = IncomingBB;
173 auto *SuccBB = PhiBB;
175 while (true) {
176 if (isa<CondBrInst>(PredBB->getTerminator()))
177 return {PredBB, SuccBB};
178 Visited.insert(PredBB);
179 auto *SinglePredBB = PredBB->getSinglePredecessor();
180 if (!SinglePredBB)
181 return {nullptr, nullptr};
182
183 // Stop searching when SinglePredBB has been visited. It means we see
184 // an unreachable loop.
185 if (Visited.count(SinglePredBB))
186 return {nullptr, nullptr};
187
188 SuccBB = PredBB;
189 PredBB = SinglePredBB;
190 }
191 };
192
193 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
194 Value *PhiOpnd = PN->getIncomingValue(i);
195 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
196
197 if (!CI || !CI->getType()->isIntegerTy(1))
198 continue;
199
202 TrueWeight, TrueWeight + FalseWeight)
204 FalseWeight, TrueWeight + FalseWeight));
205
206 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
207 if (!PredOutEdge.first)
208 return;
209
210 BasicBlock *PredBB = PredOutEdge.first;
211 CondBrInst *PredBr = dyn_cast<CondBrInst>(PredBB->getTerminator());
212 if (!PredBr)
213 return;
214
215 uint64_t PredTrueWeight, PredFalseWeight;
216 // FIXME: We currently only set the profile data when it is missing.
217 // With PGO, this can be used to refine even existing profile data with
218 // context information. This needs to be done after more performance
219 // testing.
220 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
221 continue;
222
223 // We can not infer anything useful when BP >= 50%, because BP is the
224 // upper bound probability value.
225 if (BP >= BranchProbability(50, 100))
226 continue;
227
228 uint32_t Weights[2];
229 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
230 Weights[0] = BP.getNumerator();
231 Weights[1] = BP.getCompl().getNumerator();
232 } else {
233 Weights[0] = BP.getCompl().getNumerator();
234 Weights[1] = BP.getNumerator();
235 }
236 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
237 }
238}
239
242 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
243 // Jump Threading has no sense for the targets with divergent CF
244 if (TTI.hasBranchDivergence(&F))
245 return PreservedAnalyses::all();
246 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
247 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
248 auto &AA = AM.getResult<AAManager>(F);
249 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
250
251 bool Changed =
252 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
253 std::make_unique<DomTreeUpdater>(
254 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
255 nullptr, nullptr);
256
257 if (!Changed)
258 return PreservedAnalyses::all();
259
260
262
263#if defined(EXPENSIVE_CHECKS)
265 DominatorTree::VerificationLevel::Full) &&
266 "DT broken after JumpThreading");
267 assert((!getDomTreeUpdater()->hasPostDomTree() ||
268 getDomTreeUpdater()->getPostDomTree().verify(
269 PostDominatorTree::VerificationLevel::Full)) &&
270 "PDT broken after JumpThreading");
271#else
273 DominatorTree::VerificationLevel::Fast) &&
274 "DT broken after JumpThreading");
275 assert((!getDomTreeUpdater()->hasPostDomTree() ||
276 getDomTreeUpdater()->getPostDomTree().verify(
277 PostDominatorTree::VerificationLevel::Fast)) &&
278 "PDT broken after JumpThreading");
279#endif
280
281 return getPreservedAnalysis();
282}
283
285 TargetLibraryInfo *TLI_,
287 AliasAnalysis *AA_,
288 std::unique_ptr<DomTreeUpdater> DTU_,
289 BlockFrequencyInfo *BFI_,
290 BranchProbabilityInfo *BPI_) {
291 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
292 F = &F_;
293 FAM = FAM_;
294 TLI = TLI_;
295 TTI = TTI_;
296 LVI = LVI_;
297 AA = AA_;
298 DTU = std::move(DTU_);
299 BFI = BFI_;
300 BPI = BPI_;
301 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
302 F->getParent(), Intrinsic::experimental_guard);
303 HasGuards = GuardDecl && !GuardDecl->use_empty();
304
305 // Reduce the number of instructions duplicated when optimizing strictly for
306 // size.
307 if (BBDuplicateThreshold.getNumOccurrences())
308 BBDupThreshold = BBDuplicateThreshold;
309 else if (F->hasMinSize())
310 BBDupThreshold = 3;
311 else
312 BBDupThreshold = DefaultBBDupThreshold;
313
314 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
315 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
316 DominatorTree &DT = DTU->getDomTree();
317
318 Unreachable.clear();
319 for (auto &BB : *F)
320 if (!DT.isReachableFromEntry(&BB))
321 Unreachable.insert(&BB);
322
324 findLoopHeaders(*F);
325
326 bool EverChanged = false;
327 bool Changed;
328 do {
329 Changed = false;
330 for (auto &BB : *F) {
331 if (Unreachable.count(&BB))
332 continue;
333 while (processBlock(&BB)) // Thread all of the branches we can over BB.
334 Changed = ChangedSinceLastAnalysisUpdate = true;
335
336 // Stop processing BB if it's the entry or is now deleted. The following
337 // routines attempt to eliminate BB and locating a suitable replacement
338 // for the entry is non-trivial.
339 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
340 continue;
341
342 if (pred_empty(&BB)) {
343 // When processBlock makes BB unreachable it doesn't bother to fix up
344 // the instructions in it. We must remove BB to prevent invalid IR.
345 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
346 << "' with terminator: " << *BB.getTerminator()
347 << '\n');
348 LoopHeaders.erase(&BB);
349 LVI->eraseBlock(&BB);
350 DeleteDeadBlock(&BB, DTU.get());
351 Changed = ChangedSinceLastAnalysisUpdate = true;
352 continue;
353 }
354
355 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
356 // is "almost empty", we attempt to merge BB with its sole successor.
357 if (auto *BI = dyn_cast<UncondBrInst>(BB.getTerminator())) {
358 BasicBlock *Succ = BI->getSuccessor();
359 if (
360 // The terminator must be the only non-phi instruction in BB.
361 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
362 // Don't alter Loop headers and latches to ensure another pass can
363 // detect and transform nested loops later.
364 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
366 // BB is valid for cleanup here because we passed in DTU. F remains
367 // BB's parent until a DTU->getDomTree() event.
368 LVI->eraseBlock(&BB);
369 Changed = ChangedSinceLastAnalysisUpdate = true;
370 }
371 }
372 }
373 EverChanged |= Changed;
374 } while (Changed);
375
376 // Jump threading may have introduced redundant debug values into F which
377 // should be removed.
378 if (EverChanged)
379 for (auto &BB : *F) {
381 }
382
383 LoopHeaders.clear();
384 return EverChanged;
385}
386
387// Replace uses of Cond with ToVal when safe to do so. If all uses are
388// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
389// because we may incorrectly replace uses when guards/assumes are uses of
390// of `Cond` and we used the guards/assume to reason about the `Cond` value
391// at the end of block. RAUW unconditionally replaces all uses
392// including the guards/assumes themselves and the uses before the
393// guard/assume.
395 BasicBlock *KnownAtEndOfBB) {
396 bool Changed = false;
397 assert(Cond->getType() == ToVal->getType());
398 // We can unconditionally replace all uses in non-local blocks (i.e. uses
399 // strictly dominated by BB), since LVI information is true from the
400 // terminator of BB.
401 if (Cond->getParent() == KnownAtEndOfBB)
403 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
404 // Replace any debug-info record users of Cond with ToVal.
405 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
406 DVR.replaceVariableLocationOp(Cond, ToVal, true);
407
408 // Reached the Cond whose uses we are trying to replace, so there are no
409 // more uses.
410 if (&I == Cond)
411 break;
412 // We only replace uses in instructions that are guaranteed to reach the end
413 // of BB, where we know Cond is ToVal.
415 break;
416 Changed |= I.replaceUsesOfWith(Cond, ToVal);
417 }
418 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
419 Cond->eraseFromParent();
420 Changed = true;
421 }
422 return Changed;
423}
424
425/// Return the cost of duplicating a piece of this block from first non-phi
426/// and before StopAt instruction to thread across it. Stop scanning the block
427/// when exceeding the threshold. If duplication is impossible, returns ~0U.
429 BasicBlock *BB,
431 unsigned Threshold) {
432 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
433
434 // Do not duplicate the BB if it has a lot of PHI nodes.
435 // If a threadable chain is too long then the number of PHI nodes can add up,
436 // leading to a substantial increase in compile time when rewriting the SSA.
437 unsigned PhiCount = 0;
438 Instruction *FirstNonPHI = nullptr;
439 for (Instruction &I : *BB) {
440 if (!isa<PHINode>(&I)) {
441 FirstNonPHI = &I;
442 break;
443 }
444 if (++PhiCount > PhiDuplicateThreshold)
445 return ~0U;
446 }
447
448 /// Ignore PHI nodes, these will be flattened when duplication happens.
449 BasicBlock::const_iterator I(FirstNonPHI);
450
451 // FIXME: THREADING will delete values that are just used to compute the
452 // branch, so they shouldn't count against the duplication cost.
453
454 unsigned Bonus = 0;
455 if (BB->getTerminator() == StopAt) {
456 // Threading through a switch statement is particularly profitable. If this
457 // block ends in a switch, decrease its cost to make it more likely to
458 // happen.
460 Bonus = 6;
461
462 // The same holds for indirect branches, but slightly more so.
464 Bonus = 8;
465 }
466
467 // Bump the threshold up so the early exit from the loop doesn't skip the
468 // terminator-based Size adjustment at the end.
469 Threshold += Bonus;
470
471 // Sum up the cost of each instruction until we get to the terminator. Don't
472 // include the terminator because the copy won't include it.
473 unsigned Size = 0;
474 for (; &*I != StopAt; ++I) {
475
476 // Stop scanning the block if we've reached the threshold.
477 if (Size > Threshold)
478 return Size;
479
480 // Bail out if this instruction gives back a token type, it is not possible
481 // to duplicate it if it is used outside this BB.
482 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
483 return ~0U;
484
485 // Blocks with NoDuplicate are modelled as having infinite cost, so they
486 // are never duplicated.
487 if (const CallInst *CI = dyn_cast<CallInst>(I))
488 if (CI->cannotDuplicate() || CI->isConvergent())
489 return ~0U;
490
491 if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) ==
493 continue;
494
495 // All other instructions count for at least one unit.
496 ++Size;
497
498 // Calls are more expensive. If they are non-intrinsic calls, we model them
499 // as having cost of 4. If they are a non-vector intrinsic, we model them
500 // as having cost of 2 total, and if they are a vector intrinsic, we model
501 // them as having cost 1.
502 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
503 if (!isa<IntrinsicInst>(CI))
504 Size += 3;
505 else if (!CI->getType()->isVectorTy())
506 Size += 1;
507 }
508 }
509
510 return Size > Bonus ? Size - Bonus : 0;
511}
512
513/// findLoopHeaders - We do not want jump threading to turn proper loop
514/// structures into irreducible loops. Doing this breaks up the loop nesting
515/// hierarchy and pessimizes later transformations. To prevent this from
516/// happening, we first have to find the loop headers. Here we approximate this
517/// by finding targets of backedges in the CFG.
518///
519/// Note that there definitely are cases when we want to allow threading of
520/// edges across a loop header. For example, threading a jump from outside the
521/// loop (the preheader) to an exit block of the loop is definitely profitable.
522/// It is also almost always profitable to thread backedges from within the loop
523/// to exit blocks, and is often profitable to thread backedges to other blocks
524/// within the loop (forming a nested loop). This simple analysis is not rich
525/// enough to track all of these properties and keep it up-to-date as the CFG
526/// mutates, so we don't allow any of these transformations.
532
533/// getKnownConstant - Helper method to determine if we can thread over a
534/// terminator with the given value as its condition, and if so what value to
535/// use for that. What kind of value this is depends on whether we want an
536/// integer or a block address, but an undef is always accepted.
537/// Returns null if Val is null or not an appropriate constant.
539 if (!Val)
540 return nullptr;
541
542 // Undef is "known" enough.
543 if (UndefValue *U = dyn_cast<UndefValue>(Val))
544 return U;
545
546 if (Preference == WantBlockAddress)
548
549 return dyn_cast<ConstantInt>(Val);
550}
551
552/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
553/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
554/// in any of our predecessors. If so, return the known list of value and pred
555/// BB in the result vector.
556///
557/// This returns true if there were any known values.
559 Value *V, BasicBlock *BB, PredValueInfo &Result,
560 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
561 Instruction *CxtI) {
562 const DataLayout &DL = BB->getDataLayout();
563
564 // This method walks up use-def chains recursively. Because of this, we could
565 // get into an infinite loop going around loops in the use-def chain. To
566 // prevent this, keep track of what (value, block) pairs we've already visited
567 // and terminate the search if we loop back to them
568 if (!RecursionSet.insert(V).second)
569 return false;
570
571 // If V is a constant, then it is known in all predecessors.
572 if (Constant *KC = getKnownConstant(V, Preference)) {
573 for (BasicBlock *Pred : predecessors(BB))
574 Result.emplace_back(KC, Pred);
575
576 return !Result.empty();
577 }
578
579 // If V is a non-instruction value, or an instruction in a different block,
580 // then it can't be derived from a PHI.
582 if (!I || I->getParent() != BB) {
583
584 // Okay, if this is a live-in value, see if it has a known value at the any
585 // edge from our predecessors.
586 for (BasicBlock *P : predecessors(BB)) {
587 using namespace PatternMatch;
588 // If the value is known by LazyValueInfo to be a constant in a
589 // predecessor, use that information to try to thread this block.
590 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
591 // If I is a non-local compare-with-constant instruction, use more-rich
592 // 'getPredicateOnEdge' method. This would be able to handle value
593 // inequalities better, for example if the compare is "X < 4" and "X < 3"
594 // is known true but "X < 4" itself is not available.
595 CmpPredicate Pred;
596 Value *Val;
597 Constant *Cst;
598 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
599 PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
600 if (Constant *KC = getKnownConstant(PredCst, Preference))
601 Result.emplace_back(KC, P);
602 }
603
604 return !Result.empty();
605 }
606
607 /// If I is a PHI node, then we know the incoming values for any constants.
608 if (PHINode *PN = dyn_cast<PHINode>(I)) {
609 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
610 Value *InVal = PN->getIncomingValue(i);
611 if (Constant *KC = getKnownConstant(InVal, Preference)) {
612 Result.emplace_back(KC, PN->getIncomingBlock(i));
613 } else {
614 Constant *CI = LVI->getConstantOnEdge(InVal,
615 PN->getIncomingBlock(i),
616 BB, CxtI);
617 if (Constant *KC = getKnownConstant(CI, Preference))
618 Result.emplace_back(KC, PN->getIncomingBlock(i));
619 }
620 }
621
622 return !Result.empty();
623 }
624
625 // Handle Cast instructions.
626 if (CastInst *CI = dyn_cast<CastInst>(I)) {
627 Value *Source = CI->getOperand(0);
628 PredValueInfoTy Vals;
629 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
630 RecursionSet, CxtI);
631 if (Vals.empty())
632 return false;
633
634 // Convert the known values.
635 for (auto &Val : Vals)
636 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
637 CI->getType(), DL))
638 Result.emplace_back(Folded, Val.second);
639
640 return !Result.empty();
641 }
642
643 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
644 Value *Source = FI->getOperand(0);
645 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
646 RecursionSet, CxtI);
647
648 erase_if(Result, [](auto &Pair) {
649 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
650 });
651
652 return !Result.empty();
653 }
654
655 // Handle some boolean conditions.
656 if (I->getType()->getPrimitiveSizeInBits() == 1) {
657 using namespace PatternMatch;
658 if (Preference != WantInteger)
659 return false;
660 // X | true -> true
661 // X & false -> false
662 Value *Op0, *Op1;
663 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
664 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
665 PredValueInfoTy LHSVals, RHSVals;
666
668 RecursionSet, CxtI);
670 RecursionSet, CxtI);
671
672 if (LHSVals.empty() && RHSVals.empty())
673 return false;
674
675 ConstantInt *InterestingVal;
676 if (match(I, m_LogicalOr()))
677 InterestingVal = ConstantInt::getTrue(I->getContext());
678 else
679 InterestingVal = ConstantInt::getFalse(I->getContext());
680
681 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
682
683 // Scan for the sentinel. If we find an undef, force it to the
684 // interesting value: x|undef -> true and x&undef -> false.
685 for (const auto &LHSVal : LHSVals)
686 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
687 Result.emplace_back(InterestingVal, LHSVal.second);
688 LHSKnownBBs.insert(LHSVal.second);
689 }
690 for (const auto &RHSVal : RHSVals)
691 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
692 // If we already inferred a value for this block on the LHS, don't
693 // re-add it.
694 if (!LHSKnownBBs.count(RHSVal.second))
695 Result.emplace_back(InterestingVal, RHSVal.second);
696 }
697
698 return !Result.empty();
699 }
700
701 // Handle the NOT form of XOR.
702 if (I->getOpcode() == Instruction::Xor &&
703 isa<ConstantInt>(I->getOperand(1)) &&
704 cast<ConstantInt>(I->getOperand(1))->isOne()) {
705 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
706 WantInteger, RecursionSet, CxtI);
707 if (Result.empty())
708 return false;
709
710 // Invert the known values.
711 for (auto &R : Result)
712 R.first = ConstantExpr::getNot(R.first);
713
714 return true;
715 }
716
717 // Try to simplify some other binary operator values.
718 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
719 if (Preference != WantInteger)
720 return false;
721 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
722 PredValueInfoTy LHSVals;
723 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
724 WantInteger, RecursionSet, CxtI);
725
726 // Try to use constant folding to simplify the binary operator.
727 for (const auto &LHSVal : LHSVals) {
728 Constant *V = LHSVal.first;
729 Constant *Folded =
730 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
731
732 if (Constant *KC = getKnownConstant(Folded, WantInteger))
733 Result.emplace_back(KC, LHSVal.second);
734 }
735 }
736
737 return !Result.empty();
738 }
739
740 // Handle compare with phi operand, where the PHI is defined in this block.
741 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
742 if (Preference != WantInteger)
743 return false;
744 Type *CmpType = Cmp->getType();
745 Value *CmpLHS = Cmp->getOperand(0);
746 Value *CmpRHS = Cmp->getOperand(1);
747 CmpInst::Predicate Pred = Cmp->getPredicate();
748
749 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
750 if (!PN)
751 PN = dyn_cast<PHINode>(CmpRHS);
752 // Do not perform phi translation across a loop header phi, because this
753 // may result in comparison of values from two different loop iterations.
754 // FIXME: This check is broken if LoopHeaders is not populated.
755 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
756 const DataLayout &DL = PN->getDataLayout();
757 // We can do this simplification if any comparisons fold to true or false.
758 // See if any do.
759 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
760 BasicBlock *PredBB = PN->getIncomingBlock(i);
761 Value *LHS, *RHS;
762 if (PN == CmpLHS) {
763 LHS = PN->getIncomingValue(i);
764 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
765 } else {
766 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
767 RHS = PN->getIncomingValue(i);
768 }
769 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
770 if (!Res) {
771 if (!isa<Constant>(RHS))
772 continue;
773
774 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
775 auto LHSInst = dyn_cast<Instruction>(LHS);
776 if (LHSInst && LHSInst->getParent() == BB)
777 continue;
778
779 Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
780 BB, CxtI ? CxtI : Cmp);
781 }
782
783 if (Constant *KC = getKnownConstant(Res, WantInteger))
784 Result.emplace_back(KC, PredBB);
785 }
786
787 return !Result.empty();
788 }
789
790 // If comparing a live-in value against a constant, see if we know the
791 // live-in value on any predecessors.
792 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
793 Constant *CmpConst = cast<Constant>(CmpRHS);
794
795 if (!isa<Instruction>(CmpLHS) ||
796 cast<Instruction>(CmpLHS)->getParent() != BB) {
797 for (BasicBlock *P : predecessors(BB)) {
798 // If the value is known by LazyValueInfo to be a constant in a
799 // predecessor, use that information to try to thread this block.
800 Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
801 CxtI ? CxtI : Cmp);
802 if (Constant *KC = getKnownConstant(Res, WantInteger))
803 Result.emplace_back(KC, P);
804 }
805
806 return !Result.empty();
807 }
808
809 // InstCombine can fold some forms of constant range checks into
810 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
811 // x as a live-in.
812 {
813 using namespace PatternMatch;
814
815 Value *AddLHS;
816 ConstantInt *AddConst;
817 if (isa<ConstantInt>(CmpConst) &&
818 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
819 if (!isa<Instruction>(AddLHS) ||
820 cast<Instruction>(AddLHS)->getParent() != BB) {
821 for (BasicBlock *P : predecessors(BB)) {
822 // If the value is known by LazyValueInfo to be a ConstantRange in
823 // a predecessor, use that information to try to thread this
824 // block.
825 ConstantRange CR = LVI->getConstantRangeOnEdge(
826 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
827 // Propagate the range through the addition.
828 CR = CR.add(AddConst->getValue());
829
830 // Get the range where the compare returns true.
832 Pred, cast<ConstantInt>(CmpConst)->getValue());
833
834 Constant *ResC;
835 if (CmpRange.contains(CR))
836 ResC = ConstantInt::getTrue(CmpType);
837 else if (CmpRange.inverse().contains(CR))
838 ResC = ConstantInt::getFalse(CmpType);
839 else
840 continue;
841
842 Result.emplace_back(ResC, P);
843 }
844
845 return !Result.empty();
846 }
847 }
848 }
849
850 // Try to find a constant value for the LHS of a comparison,
851 // and evaluate it statically if we can.
852 PredValueInfoTy LHSVals;
853 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
854 WantInteger, RecursionSet, CxtI);
855
856 for (const auto &LHSVal : LHSVals) {
857 Constant *V = LHSVal.first;
858 Constant *Folded =
859 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
860 if (Constant *KC = getKnownConstant(Folded, WantInteger))
861 Result.emplace_back(KC, LHSVal.second);
862 }
863
864 return !Result.empty();
865 }
866 }
867
869 // Handle select instructions where at least one operand is a known constant
870 // and we can figure out the condition value for any predecessor block.
871 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
872 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
873 PredValueInfoTy Conds;
874 if ((TrueVal || FalseVal) &&
875 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
876 WantInteger, RecursionSet, CxtI)) {
877 for (auto &C : Conds) {
878 Constant *Cond = C.first;
879
880 // Figure out what value to use for the condition.
881 bool KnownCond;
883 // A known boolean.
884 KnownCond = CI->isOne();
885 } else {
886 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
887 // Either operand will do, so be sure to pick the one that's a known
888 // constant.
889 // FIXME: Do this more cleverly if both values are known constants?
890 KnownCond = (TrueVal != nullptr);
891 }
892
893 // See if the select has a known constant value for this predecessor.
894 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
895 Result.emplace_back(Val, C.second);
896 }
897
898 return !Result.empty();
899 }
900 }
901
902 // If all else fails, see if LVI can figure out a constant value for us.
903 assert(CxtI->getParent() == BB && "CxtI should be in BB");
904 Constant *CI = LVI->getConstant(V, CxtI);
905 if (Constant *KC = getKnownConstant(CI, Preference)) {
906 for (BasicBlock *Pred : predecessors(BB))
907 Result.emplace_back(KC, Pred);
908 }
909
910 return !Result.empty();
911}
912
913/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
914/// in an undefined jump, decide which block is best to revector to.
915///
916/// Since we can pick an arbitrary destination, we pick the successor with the
917/// fewest predecessors. This should reduce the in-degree of the others.
919 Instruction *BBTerm = BB->getTerminator();
920 unsigned MinSucc = 0;
921 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
922 // Compute the successor with the minimum number of predecessors.
923 unsigned MinNumPreds = pred_size(TestBB);
924 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
925 TestBB = BBTerm->getSuccessor(i);
926 unsigned NumPreds = pred_size(TestBB);
927 if (NumPreds < MinNumPreds) {
928 MinSucc = i;
929 MinNumPreds = NumPreds;
930 }
931 }
932
933 return MinSucc;
934}
935
937 if (!BB->hasAddressTaken()) return false;
938
939 // If the block has its address taken, it may be a tree of dead constants
940 // hanging off of it. These shouldn't keep the block alive.
943 return !BA->use_empty();
944}
945
946/// processBlock - If there are any predecessors whose control can be threaded
947/// through to a successor, transform them now.
949 // If the block is trivially dead, just return and let the caller nuke it.
950 // This simplifies other transformations.
951 if (DTU->isBBPendingDeletion(BB) ||
952 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
953 return false;
954
955 // If this block has a single predecessor, and if that pred has a single
956 // successor, merge the blocks. This encourages recursive jump threading
957 // because now the condition in this block can be threaded through
958 // predecessors of our predecessor block.
960 return true;
961
963 return true;
964
965 // Look if we can propagate guards to predecessors.
966 if (HasGuards && processGuards(BB))
967 return true;
968
969 // What kind of constant we're looking for.
970 ConstantPreference Preference = WantInteger;
971
972 // Look to see if the terminator is a conditional branch, switch or indirect
973 // branch, if not we can't thread it.
974 Value *Condition;
975 Instruction *Terminator = BB->getTerminator();
976 if (CondBrInst *BI = dyn_cast<CondBrInst>(Terminator)) {
977 Condition = BI->getCondition();
978 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
979 Condition = SI->getCondition();
980 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
981 // Can't thread indirect branch with no successors.
982 if (IB->getNumSuccessors() == 0) return false;
983 Condition = IB->getAddress()->stripPointerCasts();
984 Preference = WantBlockAddress;
985 } else {
986 return false; // Must be an invoke or callbr.
987 }
988
989 // Keep track if we constant folded the condition in this invocation.
990 bool ConstantFolded = false;
991
992 // Run constant folding to see if we can reduce the condition to a simple
993 // constant.
994 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
995 Value *SimpleVal =
997 if (SimpleVal) {
998 I->replaceAllUsesWith(SimpleVal);
1000 I->eraseFromParent();
1001 Condition = SimpleVal;
1002 ConstantFolded = true;
1003 }
1004 }
1005
1006 // If the terminator is branching on an undef or freeze undef, we can pick any
1007 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1008 auto *FI = dyn_cast<FreezeInst>(Condition);
1009 if (isa<UndefValue>(Condition) ||
1010 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1011 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1012 std::vector<DominatorTree::UpdateType> Updates;
1013
1014 // Fold the branch/switch.
1015 Instruction *BBTerm = BB->getTerminator();
1016 Updates.reserve(BBTerm->getNumSuccessors());
1017 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1018 if (i == BestSucc) continue;
1019 BasicBlock *Succ = BBTerm->getSuccessor(i);
1020 Succ->removePredecessor(BB, true);
1021 Updates.push_back({DominatorTree::Delete, BB, Succ});
1022 }
1023
1024 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1025 << "' folding undef terminator: " << *BBTerm << '\n');
1026 Instruction *NewBI = UncondBrInst::Create(BBTerm->getSuccessor(BestSucc),
1027 BBTerm->getIterator());
1028 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1029 ++NumFolds;
1030 BBTerm->eraseFromParent();
1031 DTU->applyUpdatesPermissive(Updates);
1032 if (FI)
1033 FI->eraseFromParent();
1034 return true;
1035 }
1036
1037 // If the terminator of this block is branching on a constant, simplify the
1038 // terminator to an unconditional branch. This can occur due to threading in
1039 // other blocks.
1040 if (getKnownConstant(Condition, Preference)) {
1041 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1042 << "' folding terminator: " << *BB->getTerminator()
1043 << '\n');
1044 ++NumFolds;
1045 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1046 if (auto *BPI = getBPI())
1047 BPI->eraseBlock(BB);
1048 return true;
1049 }
1050
1051 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1052
1053 // All the rest of our checks depend on the condition being an instruction.
1054 if (!CondInst) {
1055 // FIXME: Unify this with code below.
1056 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1057 return true;
1058 return ConstantFolded;
1059 }
1060
1061 // Some of the following optimization can safely work on the unfrozen cond.
1062 Value *CondWithoutFreeze = CondInst;
1063 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1064 CondWithoutFreeze = FI->getOperand(0);
1065
1066 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1067 // If we're branching on a conditional, LVI might be able to determine
1068 // it's value at the branch instruction. We only handle comparisons
1069 // against a constant at this time.
1070 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1071 Constant *Res =
1072 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1073 CondConst, BB->getTerminator(),
1074 /*UseBlockValue=*/false);
1075 if (Res) {
1076 // We can safely replace *some* uses of the CondInst if it has
1077 // exactly one value as returned by LVI. RAUW is incorrect in the
1078 // presence of guards and assumes, that have the `Cond` as the use. This
1079 // is because we use the guards/assume to reason about the `Cond` value
1080 // at the end of block, but RAUW unconditionally replaces all uses
1081 // including the guards/assumes themselves and the uses before the
1082 // guard/assume.
1083 if (replaceFoldableUses(CondCmp, Res, BB))
1084 return true;
1085 }
1086
1087 // We did not manage to simplify this branch, try to see whether
1088 // CondCmp depends on a known phi-select pattern.
1089 if (tryToUnfoldSelect(CondCmp, BB))
1090 return true;
1091 }
1092 }
1093
1095 if (tryToUnfoldSelect(SI, BB))
1096 return true;
1097
1098 // Check for some cases that are worth simplifying. Right now we want to look
1099 // for loads that are used by a switch or by the condition for the branch. If
1100 // we see one, check to see if it's partially redundant. If so, insert a PHI
1101 // which can then be used to thread the values.
1102 Value *SimplifyValue = CondWithoutFreeze;
1103
1104 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1105 if (isa<Constant>(CondCmp->getOperand(1)))
1106 SimplifyValue = CondCmp->getOperand(0);
1107
1108 // TODO: There are other places where load PRE would be profitable, such as
1109 // more complex comparisons.
1110 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1112 return true;
1113
1114 // Before threading, try to propagate profile data backwards:
1115 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1116 if (PN->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1118
1119 // Handle a variety of cases where we are branching on something derived from
1120 // a PHI node in the current block. If we can prove that any predecessors
1121 // compute a predictable value based on a PHI node, thread those predecessors.
1122 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1123 return true;
1124
1125 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1126 // the current block, see if we can simplify.
1127 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1128 if (PN && PN->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1129 return processBranchOnPHI(PN);
1130
1131 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1132 if (CondInst->getOpcode() == Instruction::Xor &&
1133 CondInst->getParent() == BB && isa<CondBrInst>(BB->getTerminator()))
1134 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1135
1136 // Search for a stronger dominating condition that can be used to simplify a
1137 // conditional branch leaving BB.
1139 return true;
1140
1141 return false;
1142}
1143
1145 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
1146 if (!BI)
1147 return false;
1148
1149 Value *Cond = BI->getCondition();
1150 // Assuming that predecessor's branch was taken, if pred's branch condition
1151 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1152 // freeze(Cond) is either true or a nondeterministic value.
1153 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1154 // without affecting other instructions.
1155 auto *FICond = dyn_cast<FreezeInst>(Cond);
1156 if (FICond && FICond->hasOneUse())
1157 Cond = FICond->getOperand(0);
1158 else
1159 FICond = nullptr;
1160
1161 BasicBlock *CurrentBB = BB;
1162 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1163 unsigned Iter = 0;
1164
1165 auto &DL = BB->getDataLayout();
1166
1167 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1168 auto *PBI = dyn_cast<CondBrInst>(CurrentPred->getTerminator());
1169 if (!PBI)
1170 return false;
1171 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1172 return false;
1173
1174 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1175 std::optional<bool> Implication =
1176 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1177
1178 // If the branch condition of BB (which is Cond) and CurrentPred are
1179 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1180 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1181 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1182 FICond->getOperand(0))
1183 Implication = CondIsTrue;
1184 }
1185
1186 if (Implication) {
1187 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1188 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1189 RemoveSucc->removePredecessor(BB);
1190 UncondBrInst *UncondBI =
1191 UncondBrInst::Create(KeepSucc, BI->getIterator());
1192 UncondBI->setDebugLoc(BI->getDebugLoc());
1193 ++NumFolds;
1194 BI->eraseFromParent();
1195 if (FICond)
1196 FICond->eraseFromParent();
1197
1198 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1199 if (auto *BPI = getBPI())
1200 BPI->eraseBlock(BB);
1201 return true;
1202 }
1203 CurrentBB = CurrentPred;
1204 CurrentPred = CurrentBB->getSinglePredecessor();
1205 }
1206
1207 return false;
1208}
1209
1210/// Return true if Op is an instruction defined in the given block.
1212 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1213 if (OpInst->getParent() == BB)
1214 return true;
1215 return false;
1216}
1217
1218/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1219/// redundant load instruction, eliminate it by replacing it with a PHI node.
1220/// This is an important optimization that encourages jump threading, and needs
1221/// to be run interlaced with other jump threading tasks.
1223 // Don't hack volatile and ordered loads.
1224 if (!LoadI->isUnordered()) return false;
1225
1226 // If the load is defined in a block with exactly one predecessor, it can't be
1227 // partially redundant.
1228 BasicBlock *LoadBB = LoadI->getParent();
1229 if (LoadBB->getSinglePredecessor())
1230 return false;
1231
1232 // If the load is defined in an EH pad, it can't be partially redundant,
1233 // because the edges between the invoke and the EH pad cannot have other
1234 // instructions between them.
1235 if (LoadBB->isEHPad())
1236 return false;
1237
1238 Value *LoadedPtr = LoadI->getOperand(0);
1239
1240 // If the loaded operand is defined in the LoadBB and its not a phi,
1241 // it can't be available in predecessors.
1242 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1243 return false;
1244
1245 // Scan a few instructions up from the load, to see if it is obviously live at
1246 // the entry to its block.
1247 BasicBlock::iterator BBIt(LoadI);
1248 bool IsLoadCSE;
1249 BatchAAResults BatchAA(*AA);
1250 // The dominator tree is updated lazily and may not be valid at this point.
1251 BatchAA.disableDominatorTree();
1252 if (Value *AvailableVal = FindAvailableLoadedValue(
1253 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1254 // If the value of the load is locally available within the block, just use
1255 // it. This frequently occurs for reg2mem'd allocas.
1256
1257 if (IsLoadCSE) {
1258 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1259 combineMetadataForCSE(NLoadI, LoadI, false);
1260 LVI->forgetValue(NLoadI);
1261 };
1262
1263 // If the returned value is the load itself, replace with poison. This can
1264 // only happen in dead loops.
1265 if (AvailableVal == LoadI)
1266 AvailableVal = PoisonValue::get(LoadI->getType());
1267 if (AvailableVal->getType() != LoadI->getType()) {
1268 AvailableVal = CastInst::CreateBitOrPointerCast(
1269 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1270 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1271 }
1272 LoadI->replaceAllUsesWith(AvailableVal);
1273 LoadI->eraseFromParent();
1274 return true;
1275 }
1276
1277 // Otherwise, if we scanned the whole block and got to the top of the block,
1278 // we know the block is locally transparent to the load. If not, something
1279 // might clobber its value.
1280 if (BBIt != LoadBB->begin())
1281 return false;
1282
1283 // If all of the loads and stores that feed the value have the same AA tags,
1284 // then we can propagate them onto any newly inserted loads.
1285 AAMDNodes AATags = LoadI->getAAMetadata();
1286
1287 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1288
1289 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1290
1291 AvailablePredsTy AvailablePreds;
1292 BasicBlock *OneUnavailablePred = nullptr;
1294
1295 // If we got here, the loaded value is transparent through to the start of the
1296 // block. Check to see if it is available in any of the predecessor blocks.
1297 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1298 // If we already scanned this predecessor, skip it.
1299 if (!PredsScanned.insert(PredBB).second)
1300 continue;
1301
1302 BBIt = PredBB->end();
1303 unsigned NumScanedInst = 0;
1304 Value *PredAvailable = nullptr;
1305 // NOTE: We don't CSE load that is volatile or anything stronger than
1306 // unordered, that should have been checked when we entered the function.
1307 assert(LoadI->isUnordered() &&
1308 "Attempting to CSE volatile or atomic loads");
1309 // If this is a load on a phi pointer, phi-translate it and search
1310 // for available load/store to the pointer in predecessors.
1311 Type *AccessTy = LoadI->getType();
1312 const auto &DL = LoadI->getDataLayout();
1313 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1314 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1315 AATags);
1316 PredAvailable = findAvailablePtrLoadStore(
1317 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1318 &BatchAA, &IsLoadCSE, &NumScanedInst);
1319
1320 // If PredBB has a single predecessor, continue scanning through the
1321 // single predecessor.
1322 BasicBlock *SinglePredBB = PredBB;
1323 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1324 NumScanedInst < DefMaxInstsToScan) {
1325 SinglePredBB = SinglePredBB->getSinglePredecessor();
1326 if (SinglePredBB) {
1327 BBIt = SinglePredBB->end();
1328 PredAvailable = findAvailablePtrLoadStore(
1329 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1330 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1331 &NumScanedInst);
1332 }
1333 }
1334
1335 if (!PredAvailable) {
1336 OneUnavailablePred = PredBB;
1337 continue;
1338 }
1339
1340 if (IsLoadCSE)
1341 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1342
1343 // If so, this load is partially redundant. Remember this info so that we
1344 // can create a PHI node.
1345 AvailablePreds.emplace_back(PredBB, PredAvailable);
1346 }
1347
1348 // If the loaded value isn't available in any predecessor, it isn't partially
1349 // redundant.
1350 if (AvailablePreds.empty()) return false;
1351
1352 // Okay, the loaded value is available in at least one (and maybe all!)
1353 // predecessors. If the value is unavailable in more than one unique
1354 // predecessor, we want to insert a merge block for those common predecessors.
1355 // This ensures that we only have to insert one reload, thus not increasing
1356 // code size.
1357 BasicBlock *UnavailablePred = nullptr;
1358
1359 // If the value is unavailable in one of predecessors, we will end up
1360 // inserting a new instruction into them. It is only valid if all the
1361 // instructions before LoadI are guaranteed to pass execution to its
1362 // successor, or if LoadI is safe to speculate.
1363 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1364 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1365 // It requires domination tree analysis, so for this simple case it is an
1366 // overkill.
1367 std::optional<bool> GuaranteedToTransfer;
1368 auto CanSpeculateInto = [&](const BasicBlock *Pred) {
1369 if (isSafeToSpeculativelyExecute(LoadI, Pred->getTerminator()))
1370 return true;
1371
1372 if (!GuaranteedToTransfer)
1373 GuaranteedToTransfer = isGuaranteedToTransferExecutionToSuccessor(
1374 LoadBB->begin(), LoadI->getIterator());
1375 return *GuaranteedToTransfer;
1376 };
1377
1378 // If there is exactly one predecessor where the value is unavailable, the
1379 // already computed 'OneUnavailablePred' block is it. If it ends in an
1380 // unconditional branch, we know that it isn't a critical edge.
1381 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1382 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1383 UnavailablePred = OneUnavailablePred;
1384 if (!CanSpeculateInto(UnavailablePred))
1385 return false;
1386 } else if (PredsScanned.size() != AvailablePreds.size()) {
1387 // Otherwise, we had multiple unavailable predecessors or we had a critical
1388 // edge from the one.
1389 SmallVector<BasicBlock*, 8> PredsToSplit;
1390 SmallPtrSet<BasicBlock *, 8> AvailablePredSet(
1391 llvm::from_range, llvm::make_first_range(AvailablePreds));
1392
1393 // Add all the unavailable predecessors to the PredsToSplit list.
1394 for (BasicBlock *P : predecessors(LoadBB)) {
1395 // If the predecessor is an indirect goto, we can't split the edge.
1396 if (isa<IndirectBrInst>(P->getTerminator()))
1397 return false;
1398
1399 if (!AvailablePredSet.count(P)) {
1400 if (!CanSpeculateInto(P))
1401 return false;
1402 PredsToSplit.push_back(P);
1403 }
1404 }
1405
1406 // Split them out to their own block.
1407 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1408 }
1409
1410 // If the value isn't available in all predecessors, then there will be
1411 // exactly one where it isn't available. Insert a load on that edge and add
1412 // it to the AvailablePreds list.
1413 if (UnavailablePred) {
1414 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1415 "Can't handle critical edge here!");
1416 LoadInst *NewVal = new LoadInst(
1417 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1418 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1419 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1420 UnavailablePred->getTerminator()->getIterator());
1421 NewVal->setDebugLoc(LoadI->getDebugLoc());
1422 if (AATags)
1423 NewVal->setAAMetadata(AATags);
1424
1425 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1426 }
1427
1428 // Now we know that each predecessor of this block has a value in
1429 // AvailablePreds, sort them for efficient access as we're walking the preds.
1430 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1431
1432 // Create a PHI node at the start of the block for the PRE'd load value.
1433 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1434 PN->insertBefore(LoadBB->begin());
1435 PN->takeName(LoadI);
1436 PN->setDebugLoc(LoadI->getDebugLoc());
1437
1438 // Insert new entries into the PHI for each predecessor. A single block may
1439 // have multiple entries here.
1440 for (BasicBlock *P : predecessors(LoadBB)) {
1441 AvailablePredsTy::iterator I =
1442 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1443
1444 assert(I != AvailablePreds.end() && I->first == P &&
1445 "Didn't find entry for predecessor!");
1446
1447 // If we have an available predecessor but it requires casting, insert the
1448 // cast in the predecessor and use the cast. Note that we have to update the
1449 // AvailablePreds vector as we go so that all of the PHI entries for this
1450 // predecessor use the same bitcast.
1451 Value *&PredV = I->second;
1452 if (PredV->getType() != LoadI->getType()) {
1454 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1455 // The new cast is producing the value used to replace the load
1456 // instruction, so uses the load's debug location. If P does not always
1457 // branch to the load BB however then the debug location must be dropped,
1458 // as it is hoisted past a conditional branch.
1459 DebugLoc DL = P->getTerminator()->getNumSuccessors() == 1
1460 ? LoadI->getDebugLoc()
1462 cast<CastInst>(PredV)->setDebugLoc(DL);
1463 }
1464
1465 PN->addIncoming(PredV, I->first);
1466 }
1467
1468 for (LoadInst *PredLoadI : CSELoads) {
1469 combineMetadataForCSE(PredLoadI, LoadI, true);
1470 LVI->forgetValue(PredLoadI);
1471 }
1472
1473 LoadI->replaceAllUsesWith(PN);
1474 LoadI->eraseFromParent();
1475
1476 return true;
1477}
1478
1479/// findMostPopularDest - The specified list contains multiple possible
1480/// threadable destinations. Pick the one that occurs the most frequently in
1481/// the list.
1482static BasicBlock *
1484 const SmallVectorImpl<std::pair<BasicBlock *,
1485 BasicBlock *>> &PredToDestList) {
1486 assert(!PredToDestList.empty());
1487
1488 // Determine popularity. If there are multiple possible destinations, we
1489 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1490 // blocks with known and real destinations to threading undef. We'll handle
1491 // them later if interesting.
1492 MapVector<BasicBlock *, unsigned> DestPopularity;
1493
1494 // Populate DestPopularity with the successors in the order they appear in the
1495 // successor list. This way, we ensure determinism by iterating it in the
1496 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1497 // return nullptr when PredToDestList contains nullptr only.
1498 DestPopularity[nullptr] = 0;
1499 for (auto *SuccBB : successors(BB))
1500 DestPopularity[SuccBB] = 0;
1501
1502 for (const auto &PredToDest : PredToDestList)
1503 if (PredToDest.second)
1504 DestPopularity[PredToDest.second]++;
1505
1506 // Find the most popular dest.
1507 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1508
1509 // Okay, we have finally picked the most popular destination.
1510 return MostPopular->first;
1511}
1512
1513// Try to evaluate the value of V when the control flows from PredPredBB to
1514// BB->getSinglePredecessor() and then on to BB.
1516 BasicBlock *PredPredBB,
1517 Value *V,
1518 const DataLayout &DL) {
1520 return evaluateOnPredecessorEdge(BB, PredPredBB, V, DL, Visited);
1521}
1522
1524 BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL,
1525 SmallPtrSet<Value *, 8> &Visited) {
1526 if (!Visited.insert(V).second)
1527 return nullptr;
1528 llvm::scope_exit _([&Visited, V]() { Visited.erase(V); });
1529
1530 BasicBlock *PredBB = BB->getSinglePredecessor();
1531 assert(PredBB && "Expected a single predecessor");
1532
1533 if (Constant *Cst = dyn_cast<Constant>(V)) {
1534 return Cst;
1535 }
1536
1537 // Consult LVI if V is not an instruction in BB or PredBB.
1539 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1540 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1541 }
1542
1543 // Look into a PHI argument.
1544 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1545 if (PHI->getParent() == PredBB)
1546 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1547 return nullptr;
1548 }
1549
1550 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1551 // Note that during the execution of the pass, phi nodes may become constant
1552 // and may be removed, which can lead to self-referencing instructions in
1553 // code that becomes unreachable. Consequently, we need to handle those
1554 // instructions in unreachable code and check before going into recursion.
1555 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1556 if (CondCmp->getParent() == BB) {
1558 BB, PredPredBB, CondCmp->getOperand(0), DL, Visited);
1560 BB, PredPredBB, CondCmp->getOperand(1), DL, Visited);
1561 if (Op0 && Op1) {
1562 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1563 Op1, DL);
1564 }
1565 }
1566 return nullptr;
1567 }
1568
1569 return nullptr;
1570}
1571
1573 ConstantPreference Preference,
1574 Instruction *CxtI) {
1575 // If threading this would thread across a loop header, don't even try to
1576 // thread the edge.
1577 if (LoopHeaders.count(BB))
1578 return false;
1579
1580 PredValueInfoTy PredValues;
1581 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1582 CxtI)) {
1583 // We don't have known values in predecessors. See if we can thread through
1584 // BB and its sole predecessor.
1586 }
1587
1588 assert(!PredValues.empty() &&
1589 "computeValueKnownInPredecessors returned true with no values");
1590
1591 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1592 for (const auto &PredValue : PredValues) {
1593 dbgs() << " BB '" << BB->getName()
1594 << "': FOUND condition = " << *PredValue.first
1595 << " for pred '" << PredValue.second->getName() << "'.\n";
1596 });
1597
1598 // Decide what we want to thread through. Convert our list of known values to
1599 // a list of known destinations for each pred. This also discards duplicate
1600 // predecessors and keeps track of the undefined inputs (which are represented
1601 // as a null dest in the PredToDestList).
1604
1605 BasicBlock *OnlyDest = nullptr;
1606 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1607 Constant *OnlyVal = nullptr;
1608 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1609
1610 for (const auto &PredValue : PredValues) {
1611 BasicBlock *Pred = PredValue.second;
1612 if (!SeenPreds.insert(Pred).second)
1613 continue; // Duplicate predecessor entry.
1614
1615 Constant *Val = PredValue.first;
1616
1617 BasicBlock *DestBB;
1618 if (isa<UndefValue>(Val))
1619 DestBB = nullptr;
1620 else if (CondBrInst *BI = dyn_cast<CondBrInst>(BB->getTerminator())) {
1621 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1622 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1623 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1624 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1625 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1626 } else {
1628 && "Unexpected terminator");
1629 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1630 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1631 }
1632
1633 // If we have exactly one destination, remember it for efficiency below.
1634 if (PredToDestList.empty()) {
1635 OnlyDest = DestBB;
1636 OnlyVal = Val;
1637 } else {
1638 if (OnlyDest != DestBB)
1639 OnlyDest = MultipleDestSentinel;
1640 // It possible we have same destination, but different value, e.g. default
1641 // case in switchinst.
1642 if (Val != OnlyVal)
1643 OnlyVal = MultipleVal;
1644 }
1645
1646 // If the predecessor ends with an indirect goto, we can't change its
1647 // destination.
1648 if (isa<IndirectBrInst>(Pred->getTerminator()))
1649 continue;
1650
1651 PredToDestList.emplace_back(Pred, DestBB);
1652 }
1653
1654 // If all edges were unthreadable, we fail.
1655 if (PredToDestList.empty())
1656 return false;
1657
1658 // If all the predecessors go to a single known successor, we want to fold,
1659 // not thread. By doing so, we do not need to duplicate the current block and
1660 // also miss potential opportunities in case we dont/cant duplicate.
1661 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1662 if (BB->hasNPredecessors(PredToDestList.size())) {
1663 bool SeenFirstBranchToOnlyDest = false;
1664 std::vector <DominatorTree::UpdateType> Updates;
1665 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1666 for (BasicBlock *SuccBB : successors(BB)) {
1667 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1668 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1669 } else {
1670 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1671 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1672 }
1673 }
1674
1675 // Finally update the terminator.
1676 Instruction *Term = BB->getTerminator();
1677 Instruction *NewBI = UncondBrInst::Create(OnlyDest, Term->getIterator());
1678 NewBI->setDebugLoc(Term->getDebugLoc());
1679 ++NumFolds;
1680 Term->eraseFromParent();
1681 DTU->applyUpdatesPermissive(Updates);
1682 if (auto *BPI = getBPI())
1683 BPI->eraseBlock(BB);
1684
1685 // If the condition is now dead due to the removal of the old terminator,
1686 // erase it.
1687 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1688 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1689 CondInst->eraseFromParent();
1690 // We can safely replace *some* uses of the CondInst if it has
1691 // exactly one value as returned by LVI. RAUW is incorrect in the
1692 // presence of guards and assumes, that have the `Cond` as the use. This
1693 // is because we use the guards/assume to reason about the `Cond` value
1694 // at the end of block, but RAUW unconditionally replaces all uses
1695 // including the guards/assumes themselves and the uses before the
1696 // guard/assume.
1697 else if (OnlyVal && OnlyVal != MultipleVal)
1698 replaceFoldableUses(CondInst, OnlyVal, BB);
1699 }
1700 return true;
1701 }
1702 }
1703
1704 // Determine which is the most common successor. If we have many inputs and
1705 // this block is a switch, we want to start by threading the batch that goes
1706 // to the most popular destination first. If we only know about one
1707 // threadable destination (the common case) we can avoid this.
1708 BasicBlock *MostPopularDest = OnlyDest;
1709
1710 if (MostPopularDest == MultipleDestSentinel) {
1711 // Remove any loop headers from the Dest list, threadEdge conservatively
1712 // won't process them, but we might have other destination that are eligible
1713 // and we still want to process.
1714 erase_if(PredToDestList,
1715 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1716 return LoopHeaders.contains(PredToDest.second);
1717 });
1718
1719 if (PredToDestList.empty())
1720 return false;
1721
1722 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1723 }
1724
1725 // Now that we know what the most popular destination is, factor all
1726 // predecessors that will jump to it into a single predecessor.
1727 SmallVector<BasicBlock*, 16> PredsToFactor;
1728 for (const auto &PredToDest : PredToDestList)
1729 if (PredToDest.second == MostPopularDest) {
1730 BasicBlock *Pred = PredToDest.first;
1731
1732 // This predecessor may be a switch or something else that has multiple
1733 // edges to the block. Factor each of these edges by listing them
1734 // according to # occurrences in PredsToFactor.
1735 for (BasicBlock *Succ : successors(Pred))
1736 if (Succ == BB)
1737 PredsToFactor.push_back(Pred);
1738 }
1739
1740 // If the threadable edges are branching on an undefined value, we get to pick
1741 // the destination that these predecessors should get to.
1742 if (!MostPopularDest)
1743 MostPopularDest = BB->getTerminator()->
1744 getSuccessor(getBestDestForJumpOnUndef(BB));
1745
1746 // Ok, try to thread it!
1747 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1748}
1749
1750/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1751/// a PHI node (or freeze PHI) in the current block. See if there are any
1752/// simplifications we can do based on inputs to the phi node.
1754 BasicBlock *BB = PN->getParent();
1755
1756 // TODO: We could make use of this to do it once for blocks with common PHI
1757 // values.
1759 PredBBs.resize(1);
1760
1761 // If any of the predecessor blocks end in an unconditional branch, we can
1762 // *duplicate* the conditional branch into that block in order to further
1763 // encourage jump threading and to eliminate cases where we have branch on a
1764 // phi of an icmp (branch on icmp is much better).
1765 // This is still beneficial when a frozen phi is used as the branch condition
1766 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1767 // to br(icmp(freeze ...)).
1768 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1769 BasicBlock *PredBB = PN->getIncomingBlock(i);
1770 if (isa<UncondBrInst>(PredBB->getTerminator())) {
1771 PredBBs[0] = PredBB;
1772 // Try to duplicate BB into PredBB.
1773 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1774 return true;
1775 }
1776 }
1777
1778 return false;
1779}
1780
1781/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1782/// a xor instruction in the current block. See if there are any
1783/// simplifications we can do based on inputs to the xor.
1785 BasicBlock *BB = BO->getParent();
1786
1787 // If either the LHS or RHS of the xor is a constant, don't do this
1788 // optimization.
1789 if (isa<ConstantInt>(BO->getOperand(0)) ||
1791 return false;
1792
1793 // If the first instruction in BB isn't a phi, we won't be able to infer
1794 // anything special about any particular predecessor.
1795 if (!isa<PHINode>(BB->front()))
1796 return false;
1797
1798 // If this BB is a landing pad, we won't be able to split the edge into it.
1799 if (BB->isEHPad())
1800 return false;
1801
1802 // If we have a xor as the branch input to this block, and we know that the
1803 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1804 // the condition into the predecessor and fix that value to true, saving some
1805 // logical ops on that path and encouraging other paths to simplify.
1806 //
1807 // This copies something like this:
1808 //
1809 // BB:
1810 // %X = phi i1 [1], [%X']
1811 // %Y = icmp eq i32 %A, %B
1812 // %Z = xor i1 %X, %Y
1813 // br i1 %Z, ...
1814 //
1815 // Into:
1816 // BB':
1817 // %Y = icmp ne i32 %A, %B
1818 // br i1 %Y, ...
1819
1820 PredValueInfoTy XorOpValues;
1821 bool isLHS = true;
1822 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1823 WantInteger, BO)) {
1824 assert(XorOpValues.empty());
1825 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1826 WantInteger, BO))
1827 return false;
1828 isLHS = false;
1829 }
1830
1831 assert(!XorOpValues.empty() &&
1832 "computeValueKnownInPredecessors returned true with no values");
1833
1834 // Scan the information to see which is most popular: true or false. The
1835 // predecessors can be of the set true, false, or undef.
1836 unsigned NumTrue = 0, NumFalse = 0;
1837 for (const auto &XorOpValue : XorOpValues) {
1838 if (isa<UndefValue>(XorOpValue.first))
1839 // Ignore undefs for the count.
1840 continue;
1841 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1842 ++NumFalse;
1843 else
1844 ++NumTrue;
1845 }
1846
1847 // Determine which value to split on, true, false, or undef if neither.
1848 ConstantInt *SplitVal = nullptr;
1849 if (NumTrue > NumFalse)
1850 SplitVal = ConstantInt::getTrue(BB->getContext());
1851 else if (NumTrue != 0 || NumFalse != 0)
1852 SplitVal = ConstantInt::getFalse(BB->getContext());
1853
1854 // Collect all of the blocks that this can be folded into so that we can
1855 // factor this once and clone it once.
1856 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1857 for (const auto &XorOpValue : XorOpValues) {
1858 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1859 continue;
1860
1861 BlocksToFoldInto.push_back(XorOpValue.second);
1862 }
1863
1864 // If we inferred a value for all of the predecessors, then duplication won't
1865 // help us. However, we can just replace the LHS or RHS with the constant.
1866 if (BlocksToFoldInto.size() ==
1867 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1868 if (!SplitVal) {
1869 // If all preds provide undef, just nuke the xor, because it is undef too.
1871 BO->eraseFromParent();
1872 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1873 // If all preds provide 0, replace the xor with the other input.
1874 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1875 BO->eraseFromParent();
1876 } else {
1877 // If all preds provide 1, set the computed value to 1.
1878 BO->setOperand(!isLHS, SplitVal);
1879 }
1880
1881 return true;
1882 }
1883
1884 // If any of predecessors end with an indirect goto, we can't change its
1885 // destination.
1886 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1887 return isa<IndirectBrInst>(Pred->getTerminator());
1888 }))
1889 return false;
1890
1891 // Try to duplicate BB into PredBB.
1892 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1893}
1894
1895/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1896/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1897/// NewPred using the entries from OldPred (suitably mapped).
1899 BasicBlock *OldPred,
1900 BasicBlock *NewPred,
1902 for (PHINode &PN : PHIBB->phis()) {
1903 // Ok, we have a PHI node. Figure out what the incoming value was for the
1904 // DestBlock.
1905 Value *IV = PN.getIncomingValueForBlock(OldPred);
1906
1907 // Remap the value if necessary.
1908 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1910 if (I != ValueMap.end())
1911 IV = I->second;
1912 }
1913
1914 PN.addIncoming(IV, NewPred);
1915 }
1916}
1917
1918/// Merge basic block BB into its sole predecessor if possible.
1920 BasicBlock *SinglePred = BB->getSinglePredecessor();
1921 if (!SinglePred)
1922 return false;
1923
1924 const Instruction *TI = SinglePred->getTerminator();
1925 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1926 SinglePred == BB || hasAddressTakenAndUsed(BB))
1927 return false;
1928
1929 // MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid
1930 // deleting a BB pointer from Unreachable.
1931 if (Unreachable.count(SinglePred))
1932 return false;
1933
1934 // Don't merge if both the basic block and the predecessor contain loop or
1935 // entry convergent intrinsics, since there may only be one convergence token
1936 // per block.
1939 return false;
1940
1941 // If SinglePred was a loop header, BB becomes one.
1942 if (LoopHeaders.erase(SinglePred))
1943 LoopHeaders.insert(BB);
1944
1945 LVI->eraseBlock(SinglePred);
1946 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1947
1948 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1949 // BB code within one basic block `BB`), we need to invalidate the LVI
1950 // information associated with BB, because the LVI information need not be
1951 // true for all of BB after the merge. For example,
1952 // Before the merge, LVI info and code is as follows:
1953 // SinglePred: <LVI info1 for %p val>
1954 // %y = use of %p
1955 // call @exit() // need not transfer execution to successor.
1956 // assume(%p) // from this point on %p is true
1957 // br label %BB
1958 // BB: <LVI info2 for %p val, i.e. %p is true>
1959 // %x = use of %p
1960 // br label exit
1961 //
1962 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1963 // (info2 and info1 respectively). After the merge and the deletion of the
1964 // LVI info1 for SinglePred. We have the following code:
1965 // BB: <LVI info2 for %p val>
1966 // %y = use of %p
1967 // call @exit()
1968 // assume(%p)
1969 // %x = use of %p <-- LVI info2 is correct from here onwards.
1970 // br label exit
1971 // LVI info2 for BB is incorrect at the beginning of BB.
1972
1973 // Invalidate LVI information for BB if the LVI is not provably true for
1974 // all of BB.
1976 LVI->eraseBlock(BB);
1977 return true;
1978}
1979
1980/// Update the SSA form. NewBB contains instructions that are copied from BB.
1981/// ValueMapping maps old values in BB to new ones in NewBB.
1983 ValueToValueMapTy &ValueMapping) {
1984 // If there were values defined in BB that are used outside the block, then we
1985 // now have to update all uses of the value to use either the original value,
1986 // the cloned value, or some PHI derived value. This can require arbitrary
1987 // PHI insertion, of which we are prepared to do, clean these up now.
1988 SSAUpdater SSAUpdate;
1989 SmallVector<Use *, 16> UsesToRename;
1990 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1991
1992 for (Instruction &I : *BB) {
1993 // Scan all uses of this instruction to see if it is used outside of its
1994 // block, and if so, record them in UsesToRename.
1995
1996 SmallVector<Instruction *> LifetimeMarkers;
1997 for (Use &U : I.uses()) {
1998 Instruction *User = cast<Instruction>(U.getUser());
1999 if (User->isLifetimeStartOrEnd()) {
2000 LifetimeMarkers.push_back(User);
2001 } else {
2002 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2003 if (UserPN->getIncomingBlock(U) == BB)
2004 continue;
2005 } else if (User->getParent() == BB)
2006 continue;
2007 }
2008 UsesToRename.push_back(&U);
2009 }
2010
2011 // Find debug values outside of the block
2012 findDbgValues(&I, DbgVariableRecords);
2013 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
2014 return DbgVarRec->getParent() == BB;
2015 });
2016
2017 // If there are no uses outside the block, we're done with this instruction.
2018 if (UsesToRename.empty() && DbgVariableRecords.empty())
2019 continue;
2020 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2021
2022 // We found a use of I outside of BB. Rename all uses of I that are outside
2023 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2024 // with the two values we know.
2025 SSAUpdate.Initialize(I.getType(), I.getName());
2026 SSAUpdate.AddAvailableValue(BB, &I);
2027 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2028
2029 while (!UsesToRename.empty())
2030 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2031 if (!DbgVariableRecords.empty()) {
2032 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
2033 DbgVariableRecords.clear();
2034 }
2035
2036 // Lifetime markers cannot be rewritten through PHIs. If threading leaves
2037 // one of them pointing at a PHI, drop the whole set.
2038 bool HasPhiArg = any_of(LifetimeMarkers, [](Instruction *User) {
2039 return isa<PHINode>(cast<CallBase>(User)->getOperand(0));
2040 });
2041 if (HasPhiArg) {
2042 for (Instruction *User : LifetimeMarkers)
2043 User->eraseFromParent();
2044 }
2045 LLVM_DEBUG(dbgs() << "\n");
2046 }
2047}
2048
2051 if (VM.AtomMap.empty())
2052 return;
2053 for (auto It = Begin; It != End; ++It)
2054 RemapSourceAtom(&*It, VM);
2055}
2056
2057/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2058/// arguments that come from PredBB. Return the map from the variables in the
2059/// source basic block to the variables in the newly created basic block.
2060
2064 BasicBlock *NewBB,
2065 BasicBlock *PredBB) {
2066 // We are going to have to map operands from the source basic block to the new
2067 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2068 // block, evaluate them to account for entry from PredBB.
2069
2070 // Retargets dbg.value to any renamed variables.
2071 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2072 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2073 for (auto *Op : DVR->location_ops()) {
2075 if (!OpInst)
2076 continue;
2077
2078 auto I = ValueMapping.find(OpInst);
2079 if (I != ValueMapping.end())
2080 OperandsToRemap.insert({OpInst, I->second});
2081 }
2082
2083 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2084 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2085 };
2086
2087 BasicBlock *RangeBB = BI->getParent();
2088
2089 // Clone the phi nodes of the source basic block into NewBB. The resulting
2090 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2091 // might need to rewrite the operand of the cloned phi.
2092 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2093 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2094 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2095 ValueMapping[PN] = NewPN;
2096 if (const DebugLoc &DL = PN->getDebugLoc())
2097 mapAtomInstance(DL, ValueMapping);
2098 }
2099
2100 // Clone noalias scope declarations in the threaded block. When threading a
2101 // loop exit, we would otherwise end up with two idential scope declarations
2102 // visible at the same time.
2103 SmallVector<MDNode *> NoAliasScopes;
2104 DenseMap<MDNode *, MDNode *> ClonedScopes;
2105 LLVMContext &Context = PredBB->getContext();
2106 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2107 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2108
2109 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2110 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2111 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2112 RetargetDbgVariableRecordIfPossible(&DVR);
2113 };
2114
2115 // Clone the non-phi instructions of the source basic block into NewBB,
2116 // keeping track of the mapping and using it to remap operands in the cloned
2117 // instructions.
2118 for (; BI != BE; ++BI) {
2119 Instruction *New = BI->clone();
2120 New->setName(BI->getName());
2121 New->insertInto(NewBB, NewBB->end());
2122 ValueMapping[&*BI] = New;
2123 adaptNoAliasScopes(New, ClonedScopes, Context);
2124
2125 CloneAndRemapDbgInfo(New, &*BI);
2126 if (const DebugLoc &DL = New->getDebugLoc())
2127 mapAtomInstance(DL, ValueMapping);
2128
2129 // Remap operands to patch up intra-block references.
2130 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2131 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2132 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2133 if (I != ValueMapping.end())
2134 New->setOperand(i, I->second);
2135 }
2136 }
2137
2138 // There may be DbgVariableRecords on the terminator, clone directly from
2139 // marker to marker as there isn't an instruction there.
2140 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2141 // Dump them at the end.
2142 DbgMarker *Marker = RangeBB->getMarker(BE);
2143 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2144 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2145 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2146 RetargetDbgVariableRecordIfPossible(&DVR);
2147 }
2148}
2149
2150/// Attempt to thread through two successive basic blocks.
2152 Value *Cond) {
2153 // Consider:
2154 //
2155 // PredBB:
2156 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2157 // %tobool = icmp eq i32 %cond, 0
2158 // br i1 %tobool, label %BB, label ...
2159 //
2160 // BB:
2161 // %cmp = icmp eq i32* %var, null
2162 // br i1 %cmp, label ..., label ...
2163 //
2164 // We don't know the value of %var at BB even if we know which incoming edge
2165 // we take to BB. However, once we duplicate PredBB for each of its incoming
2166 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2167 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2168
2169 // Require that BB end with a Branch for simplicity.
2171 if (!CondBr)
2172 return false;
2173
2174 // BB must have exactly one predecessor.
2175 BasicBlock *PredBB = BB->getSinglePredecessor();
2176 if (!PredBB)
2177 return false;
2178
2179 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2180 // unconditional branch, we should be merging PredBB and BB instead. For
2181 // simplicity, we don't deal with a switch.
2182 CondBrInst *PredBBBranch = dyn_cast<CondBrInst>(PredBB->getTerminator());
2183 if (!PredBBBranch)
2184 return false;
2185
2186 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2187 // PredBB.
2188 if (PredBB->getSinglePredecessor())
2189 return false;
2190
2191 // Don't thread through PredBB if it contains a successor edge to itself, in
2192 // which case we would infinite loop. Suppose we are threading an edge from
2193 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2194 // successor edge to itself. If we allowed jump threading in this case, we
2195 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2196 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2197 // with another jump threading opportunity from PredBB.thread through PredBB
2198 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2199 // would keep peeling one iteration from PredBB.
2200 if (llvm::is_contained(successors(PredBB), PredBB))
2201 return false;
2202
2203 // Don't thread across a loop header.
2204 if (LoopHeaders.count(PredBB))
2205 return false;
2206
2207 // Avoid complication with duplicating EH pads.
2208 if (PredBB->isEHPad())
2209 return false;
2210
2211 // Find a predecessor that we can thread. For simplicity, we only consider a
2212 // successor edge out of BB to which we thread exactly one incoming edge into
2213 // PredBB.
2214 unsigned ZeroCount = 0;
2215 unsigned OneCount = 0;
2216 BasicBlock *ZeroPred = nullptr;
2217 BasicBlock *OnePred = nullptr;
2218 const DataLayout &DL = BB->getDataLayout();
2219 for (BasicBlock *P : predecessors(PredBB)) {
2220 // If PredPred ends with IndirectBrInst, we can't handle it.
2221 if (isa<IndirectBrInst>(P->getTerminator()))
2222 continue;
2225 if (CI->isZero()) {
2226 ZeroCount++;
2227 ZeroPred = P;
2228 } else if (CI->isOne()) {
2229 OneCount++;
2230 OnePred = P;
2231 }
2232 }
2233 }
2234
2235 // Disregard complicated cases where we have to thread multiple edges.
2236 BasicBlock *PredPredBB;
2237 if (ZeroCount == 1) {
2238 PredPredBB = ZeroPred;
2239 } else if (OneCount == 1) {
2240 PredPredBB = OnePred;
2241 } else {
2242 return false;
2243 }
2244
2245 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2246
2247 // If threading to the same block as we come from, we would infinite loop.
2248 if (SuccBB == BB) {
2249 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2250 << "' - would thread to self!\n");
2251 return false;
2252 }
2253
2254 // If threading this would thread across a loop header, don't thread the edge.
2255 // See the comments above findLoopHeaders for justifications and caveats.
2256 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2257 LLVM_DEBUG({
2258 bool BBIsHeader = LoopHeaders.count(BB);
2259 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2260 dbgs() << " Not threading across "
2261 << (BBIsHeader ? "loop header BB '" : "block BB '")
2262 << BB->getName() << "' to dest "
2263 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2264 << SuccBB->getName()
2265 << "' - it might create an irreducible loop!\n";
2266 });
2267 return false;
2268 }
2269
2270 // Compute the cost of duplicating BB and PredBB.
2271 unsigned BBCost = getJumpThreadDuplicationCost(
2272 TTI, BB, BB->getTerminator(), BBDupThreshold);
2273 unsigned PredBBCost = getJumpThreadDuplicationCost(
2274 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2275
2276 // Give up if costs are too high. We need to check BBCost and PredBBCost
2277 // individually before checking their sum because getJumpThreadDuplicationCost
2278 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2279 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2280 BBCost + PredBBCost > BBDupThreshold) {
2281 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2282 << "' - Cost is too high: " << PredBBCost
2283 << " for PredBB, " << BBCost << "for BB\n");
2284 return false;
2285 }
2286
2287 // Now we are ready to duplicate PredBB.
2288 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2289 return true;
2290}
2291
2293 BasicBlock *PredBB,
2294 BasicBlock *BB,
2295 BasicBlock *SuccBB) {
2296 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2297 << BB->getName() << "'\n");
2298
2299 // Build BPI/BFI before any changes are made to IR.
2300 bool HasProfile = doesBlockHaveProfileData(BB);
2301 auto *BFI = getOrCreateBFI(HasProfile);
2302 auto *BPI = getOrCreateBPI(BFI != nullptr);
2303
2304 CondBrInst *CondBr = cast<CondBrInst>(BB->getTerminator());
2305 CondBrInst *PredBBBranch = cast<CondBrInst>(PredBB->getTerminator());
2306
2307 BasicBlock *NewBB =
2308 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2309 PredBB->getParent(), PredBB);
2310 NewBB->moveAfter(PredBB);
2311
2312 // Set the block frequency of NewBB.
2313 if (BFI) {
2314 assert(BPI && "It's expected BPI to exist along with BFI");
2315 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2316 BPI->getEdgeProbability(PredPredBB, PredBB);
2317 BFI->setBlockFreq(NewBB, NewBBFreq);
2318 }
2319
2320 // We are going to have to map operands from the original BB block to the new
2321 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2322 // to account for entry from PredPredBB.
2323 ValueToValueMapTy ValueMapping;
2324 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2325 PredPredBB);
2326
2327 // Copy the edge probabilities from PredBB to NewBB.
2328 if (BPI)
2329 BPI->copyEdgeProbabilities(PredBB, NewBB);
2330
2331 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2332 // This eliminates predecessors from PredPredBB, which requires us to simplify
2333 // any PHI nodes in PredBB.
2334 Instruction *PredPredTerm = PredPredBB->getTerminator();
2335 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2336 if (PredPredTerm->getSuccessor(i) == PredBB) {
2337 PredBB->removePredecessor(PredPredBB, true);
2338 PredPredTerm->setSuccessor(i, NewBB);
2339 }
2340
2341 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2342 ValueMapping);
2343 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2344 ValueMapping);
2345
2346 DTU->applyUpdatesPermissive(
2347 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2348 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2349 {DominatorTree::Insert, PredPredBB, NewBB},
2350 {DominatorTree::Delete, PredPredBB, PredBB}});
2351
2352 // Remap source location atoms beacuse we're duplicating control flow.
2353 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2354
2355 updateSSA(PredBB, NewBB, ValueMapping);
2356
2357 // Clean up things like PHI nodes with single operands, dead instructions,
2358 // etc.
2359 SimplifyInstructionsInBlock(NewBB, TLI);
2360 SimplifyInstructionsInBlock(PredBB, TLI);
2361
2362 SmallVector<BasicBlock *, 1> PredsToFactor;
2363 PredsToFactor.push_back(NewBB);
2364 threadEdge(BB, PredsToFactor, SuccBB);
2365}
2366
2367/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2369 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2370 BasicBlock *SuccBB) {
2371 // If threading to the same block as we come from, we would infinite loop.
2372 if (SuccBB == BB) {
2373 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2374 << "' - would thread to self!\n");
2375 return false;
2376 }
2377
2378 // If threading this would thread across a loop header, don't thread the edge.
2379 // See the comments above findLoopHeaders for justifications and caveats.
2380 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2381 LLVM_DEBUG({
2382 bool BBIsHeader = LoopHeaders.count(BB);
2383 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2384 dbgs() << " Not threading across "
2385 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2386 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2387 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2388 });
2389 return false;
2390 }
2391
2392 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2393 TTI, BB, BB->getTerminator(), BBDupThreshold);
2394 if (JumpThreadCost > BBDupThreshold) {
2395 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2396 << "' - Cost is too high: " << JumpThreadCost << "\n");
2397 return false;
2398 }
2399
2400 threadEdge(BB, PredBBs, SuccBB);
2401 return true;
2402}
2403
2404/// threadEdge - We have decided that it is safe and profitable to factor the
2405/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2406/// across BB. Transform the IR to reflect this change.
2408 const SmallVectorImpl<BasicBlock *> &PredBBs,
2409 BasicBlock *SuccBB) {
2410 assert(SuccBB != BB && "Don't create an infinite loop");
2411
2412 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2413 "Don't thread across loop headers");
2414
2415 // Build BPI/BFI before any changes are made to IR.
2416 bool HasProfile = doesBlockHaveProfileData(BB);
2417 auto *BFI = getOrCreateBFI(HasProfile);
2418 auto *BPI = getOrCreateBPI(BFI != nullptr);
2419
2420 // And finally, do it! Start by factoring the predecessors if needed.
2421 BasicBlock *PredBB;
2422 if (PredBBs.size() == 1)
2423 PredBB = PredBBs[0];
2424 else {
2425 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2426 << " common predecessors.\n");
2427 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2428 }
2429
2430 // And finally, do it!
2431 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2432 << "' to '" << SuccBB->getName()
2433 << ", across block:\n " << *BB << "\n");
2434
2435 LVI->threadEdge(PredBB, BB, SuccBB);
2436
2438 BB->getName()+".thread",
2439 BB->getParent(), BB);
2440 NewBB->moveAfter(PredBB);
2441
2442 // Set the block frequency of NewBB.
2443 if (BFI) {
2444 assert(BPI && "It's expected BPI to exist along with BFI");
2445 auto NewBBFreq =
2446 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2447 BFI->setBlockFreq(NewBB, NewBBFreq);
2448 }
2449
2450 // Copy all the instructions from BB to NewBB except the terminator.
2451 ValueToValueMapTy ValueMapping;
2452 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2453 PredBB);
2454
2455 // We didn't copy the terminator from BB over to NewBB, because there is now
2456 // an unconditional jump to SuccBB. Insert the unconditional jump.
2457 UncondBrInst *NewBI = UncondBrInst::Create(SuccBB, NewBB);
2458 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2459
2460 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2461 // PHI nodes for NewBB now.
2462 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2463
2464 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2465 // eliminates predecessors from BB, which requires us to simplify any PHI
2466 // nodes in BB.
2467 Instruction *PredTerm = PredBB->getTerminator();
2468 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2469 if (PredTerm->getSuccessor(i) == BB) {
2470 BB->removePredecessor(PredBB, true);
2471 PredTerm->setSuccessor(i, NewBB);
2472 }
2473
2474 // Enqueue required DT updates.
2475 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2476 {DominatorTree::Insert, PredBB, NewBB},
2477 {DominatorTree::Delete, PredBB, BB}});
2478
2479 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2480 updateSSA(BB, NewBB, ValueMapping);
2481
2482 // At this point, the IR is fully up to date and consistent. Do a quick scan
2483 // over the new instructions and zap any that are constants or dead. This
2484 // frequently happens because of phi translation.
2485 SimplifyInstructionsInBlock(NewBB, TLI);
2486
2487 // Update the edge weight from BB to SuccBB, which should be less than before.
2488 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2489
2490 // Threaded an edge!
2491 ++NumThreads;
2492}
2493
2494/// Create a new basic block that will be the predecessor of BB and successor of
2495/// all blocks in Preds. When profile data is available, update the frequency of
2496/// this new block.
2497BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2499 const char *Suffix) {
2501
2502 // Collect the frequencies of all predecessors of BB, which will be used to
2503 // update the edge weight of the result of splitting predecessors.
2505 auto *BFI = getBFI();
2506 if (BFI) {
2507 auto *BPI = getOrCreateBPI(true);
2508 for (auto *Pred : Preds)
2509 FreqMap.insert(std::make_pair(
2510 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2511 }
2512
2513 // In the case when BB is a LandingPad block we create 2 new predecessors
2514 // instead of just one.
2515 if (BB->isLandingPad()) {
2516 std::string NewName = std::string(Suffix) + ".split-lp";
2517 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2518 } else {
2519 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2520 }
2521
2522 std::vector<DominatorTree::UpdateType> Updates;
2523 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2524 for (auto *NewBB : NewBBs) {
2525 BlockFrequency NewBBFreq(0);
2526 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2527 for (auto *Pred : predecessors(NewBB)) {
2528 Updates.push_back({DominatorTree::Delete, Pred, BB});
2529 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2530 if (BFI) // Update frequencies between Pred -> NewBB.
2531 NewBBFreq += FreqMap.lookup(Pred);
2532 }
2533 if (BFI) // Apply the summed frequency to NewBB.
2534 BFI->setBlockFreq(NewBB, NewBBFreq);
2535 }
2536
2537 DTU->applyUpdatesPermissive(Updates);
2538 return NewBBs[0];
2539}
2540
2541bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2542 const Instruction *TI = BB->getTerminator();
2543 if (!TI || TI->getNumSuccessors() < 2)
2544 return false;
2545
2546 return hasValidBranchWeightMD(*TI);
2547}
2548
2549/// Update the block frequency of BB and branch weight and the metadata on the
2550/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2551/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2552void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2553 BasicBlock *BB,
2554 BasicBlock *NewBB,
2555 BasicBlock *SuccBB,
2556 BlockFrequencyInfo *BFI,
2558 bool HasProfile) {
2559 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2560 "Both BFI & BPI should either be set or unset");
2561
2562 if (!BFI) {
2563 assert(!HasProfile &&
2564 "It's expected to have BFI/BPI when profile info exists");
2565 return;
2566 }
2567
2568 // As the edge from PredBB to BB is deleted, we have to update the block
2569 // frequency of BB.
2570 auto BBOrigFreq = BFI->getBlockFreq(BB);
2571 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2572 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2573 BFI->setBlockFreq(BB, BBNewFreq);
2574
2575 // Collect updated outgoing edges' frequencies from BB and use them to update
2576 // edge probabilities.
2577 SmallVector<uint64_t, 4> BBSuccFreq;
2578 for (auto It : enumerate(successors(BB))) {
2579 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, It.index());
2580 auto SuccFreq =
2581 (It.value() == SuccBB) ? BB2SuccBBFreq - NewBBFreq : BB2SuccBBFreq;
2582 BBSuccFreq.push_back(SuccFreq.getFrequency());
2583 }
2584
2585 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2586
2588 if (MaxBBSuccFreq == 0)
2589 BBSuccProbs.assign(BBSuccFreq.size(),
2590 {1, static_cast<unsigned>(BBSuccFreq.size())});
2591 else {
2592 for (uint64_t Freq : BBSuccFreq)
2593 BBSuccProbs.push_back(
2594 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2595 // Normalize edge probabilities so that they sum up to one.
2597 BBSuccProbs.end());
2598 }
2599
2600 // Update edge probabilities in BPI.
2601 BPI->setEdgeProbability(BB, BBSuccProbs);
2602
2603 // Update the profile metadata as well.
2604 //
2605 // Don't do this if the profile of the transformed blocks was statically
2606 // estimated. (This could occur despite the function having an entry
2607 // frequency in completely cold parts of the CFG.)
2608 //
2609 // In this case we don't want to suggest to subsequent passes that the
2610 // calculated weights are fully consistent. Consider this graph:
2611 //
2612 // check_1
2613 // 50% / |
2614 // eq_1 | 50%
2615 // \ |
2616 // check_2
2617 // 50% / |
2618 // eq_2 | 50%
2619 // \ |
2620 // check_3
2621 // 50% / |
2622 // eq_3 | 50%
2623 // \ |
2624 //
2625 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2626 // the overall probabilities are inconsistent; the total probability that the
2627 // value is either 1, 2 or 3 is 150%.
2628 //
2629 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2630 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2631 // the loop exit edge. Then based solely on static estimation we would assume
2632 // the loop was extremely hot.
2633 //
2634 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2635 // shouldn't make edges extremely likely or unlikely based solely on static
2636 // estimation.
2637 if (BBSuccProbs.size() >= 2 && HasProfile) {
2638 SmallVector<uint32_t, 4> Weights;
2639 for (auto Prob : BBSuccProbs)
2640 Weights.push_back(Prob.getNumerator());
2641
2642 auto TI = BB->getTerminator();
2643 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2644 }
2645}
2646
2647/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2648/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2649/// If we can duplicate the contents of BB up into PredBB do so now, this
2650/// improves the odds that the branch will be on an analyzable instruction like
2651/// a compare.
2653 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2654 assert(!PredBBs.empty() && "Can't handle an empty set");
2655
2656 // If BB is a loop header, then duplicating this block outside the loop would
2657 // cause us to transform this into an irreducible loop, don't do this.
2658 // See the comments above findLoopHeaders for justifications and caveats.
2659 if (LoopHeaders.count(BB)) {
2660 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2661 << "' into predecessor block '" << PredBBs[0]->getName()
2662 << "' - it might create an irreducible loop!\n");
2663 return false;
2664 }
2665
2666 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2667 TTI, BB, BB->getTerminator(), BBDupThreshold);
2668 if (DuplicationCost > BBDupThreshold) {
2669 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2670 << "' - Cost is too high: " << DuplicationCost << "\n");
2671 return false;
2672 }
2673
2674 // And finally, do it! Start by factoring the predecessors if needed.
2675 std::vector<DominatorTree::UpdateType> Updates;
2676 BasicBlock *PredBB;
2677 if (PredBBs.size() == 1)
2678 PredBB = PredBBs[0];
2679 else {
2680 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2681 << " common predecessors.\n");
2682 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2683 }
2684 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2685
2686 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2687 // of PredBB.
2688 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2689 << "' into end of '" << PredBB->getName()
2690 << "' to eliminate branch on phi. Cost: "
2691 << DuplicationCost << " block is:" << *BB << "\n");
2692
2693 // Unless PredBB ends with an unconditional branch, split the edge so that we
2694 // can just clone the bits from BB into the end of the new PredBB.
2695 UncondBrInst *OldPredBranch = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2696
2697 if (!OldPredBranch) {
2698 BasicBlock *OldPredBB = PredBB;
2699 PredBB = SplitEdge(OldPredBB, BB);
2700 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2701 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2702 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2703 OldPredBranch = cast<UncondBrInst>(PredBB->getTerminator());
2704 }
2705
2706 // We are going to have to map operands from the original BB block into the
2707 // PredBB block. Evaluate PHI nodes in BB.
2708 ValueToValueMapTy ValueMapping;
2709
2710 // Remember the position before the inserted instructions.
2711 auto RItBeforeInsertPt = std::next(OldPredBranch->getReverseIterator());
2712
2713 BasicBlock::iterator BI = BB->begin();
2714 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2715 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2716
2717 // Clone noalias scope declarations in the duplicated instructions. Otherwise
2718 // the duplicate would share the original block's scopes, and alias analysis
2719 // could conclude two accesses on different paths do not alias when they may.
2720 SmallVector<MDNode *> NoAliasScopes;
2721 DenseMap<MDNode *, MDNode *> ClonedScopes;
2722 LLVMContext &Context = PredBB->getContext();
2723 identifyNoAliasScopesToClone(BI, BB->end(), NoAliasScopes);
2724 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2725
2726 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2727 // mapping and using it to remap operands in the cloned instructions.
2728 for (; BI != BB->end(); ++BI) {
2729 Instruction *New = BI->clone();
2730 New->insertInto(PredBB, OldPredBranch->getIterator());
2731 adaptNoAliasScopes(New, ClonedScopes, Context);
2732
2733 // Remap operands to patch up intra-block references.
2734 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2735 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2736 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2737 if (I != ValueMapping.end())
2738 New->setOperand(i, I->second);
2739 }
2740
2741 // Remap debug variable operands.
2742 remapDebugVariable(ValueMapping, New);
2743 if (const DebugLoc &DL = New->getDebugLoc())
2744 mapAtomInstance(DL, ValueMapping);
2745
2746 // If this instruction can be simplified after the operands are updated,
2747 // just use the simplified value instead. This frequently happens due to
2748 // phi translation.
2750 New,
2751 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2752 ValueMapping[&*BI] = IV;
2753 if (!New->mayHaveSideEffects()) {
2754 New->eraseFromParent();
2755 New = nullptr;
2756 // Clone debug-info on the elided instruction to the destination
2757 // position.
2758 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2759 }
2760 } else {
2761 ValueMapping[&*BI] = New;
2762 }
2763 if (New) {
2764 // Otherwise, insert the new instruction into the block.
2765 New->setName(BI->getName());
2766 // Clone across any debug-info attached to the old instruction.
2767 New->cloneDebugInfoFrom(&*BI);
2768 // Update Dominance from simplified New instruction operands.
2769 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2770 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2771 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2772 }
2773 }
2774
2775 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2776 // add entries to the PHI nodes for branch from PredBB now.
2777 CondBrInst *BBBranch = cast<CondBrInst>(BB->getTerminator());
2778 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2779 ValueMapping);
2780 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2781 ValueMapping);
2782
2783 // KeyInstructions: Remap the cloned instructions' atoms only.
2784 remapSourceAtoms(ValueMapping, std::prev(RItBeforeInsertPt)->getIterator(),
2785 OldPredBranch->getIterator());
2786
2787 updateSSA(BB, PredBB, ValueMapping);
2788
2789 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2790 // that we nuked.
2791 BB->removePredecessor(PredBB, true);
2792
2793 // Remove the unconditional branch at the end of the PredBB block.
2794 OldPredBranch->eraseFromParent();
2795 if (auto *BPI = getBPI())
2796 BPI->copyEdgeProbabilities(BB, PredBB);
2797 DTU->applyUpdatesPermissive(Updates);
2798
2799 ++NumDupes;
2800 return true;
2801}
2802
2803// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2804// a Select instruction in Pred. BB has other predecessors and SI is used in
2805// a PHI node in BB. SI has no other use.
2806// A new basic block, NewBB, is created and SI is converted to compare and
2807// conditional branch. SI is erased from parent.
2809 SelectInst *SI, PHINode *SIUse,
2810 unsigned Idx) {
2811 // Expand the select.
2812 //
2813 // Pred --
2814 // | v
2815 // | NewBB
2816 // | |
2817 // |-----
2818 // v
2819 // BB
2820 UncondBrInst *PredTerm = cast<UncondBrInst>(Pred->getTerminator());
2821 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2822 BB->getParent(), BB);
2823 // Move the unconditional branch to NewBB.
2824 PredTerm->removeFromParent();
2825 PredTerm->insertInto(NewBB, NewBB->end());
2826 // Create a conditional branch and update PHI nodes.
2827 //
2828 // FIXME: We should `freeze` the condition before using it in a conditional
2829 // branch, unless we can prove it's not poison: select-on-poison isn't UB,
2830 // but branch-on-poison is. But doing this causes performance regressions,
2831 // and we haven't been able to find an end-to-end correctness issue it fixes.
2832 // https://github.com/llvm/llvm-project/pull/199408#issuecomment-4545013881.
2833 auto *BI = CondBrInst::Create(SI->getCondition(), NewBB, BB, Pred);
2834 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2835 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2836 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2837 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2838
2839 uint64_t TrueWeight = 1;
2840 uint64_t FalseWeight = 1;
2841 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2842 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2843 (TrueWeight + FalseWeight) != 0) {
2846 TrueWeight, TrueWeight + FalseWeight));
2848 FalseWeight, TrueWeight + FalseWeight));
2849 // Update BPI if exists.
2850 if (auto *BPI = getBPI())
2851 BPI->setEdgeProbability(Pred, BP);
2852 }
2853 // Set the block frequency of NewBB.
2854 if (auto *BFI = getBFI()) {
2855 if ((TrueWeight + FalseWeight) == 0) {
2856 TrueWeight = 1;
2857 FalseWeight = 1;
2858 }
2860 TrueWeight, TrueWeight + FalseWeight);
2861 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2862 BFI->setBlockFreq(NewBB, NewBBFreq);
2863 }
2864
2865 // The select is now dead.
2866 SI->eraseFromParent();
2867 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2868 {DominatorTree::Insert, Pred, NewBB}});
2869
2870 // Update any other PHI nodes in BB.
2871 for (BasicBlock::iterator BI = BB->begin();
2872 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2873 if (Phi != SIUse)
2874 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2875}
2876
2878 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2879
2880 if (!CondPHI || CondPHI->getParent() != BB)
2881 return false;
2882
2883 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2884 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2886
2887 // The second and third condition can be potentially relaxed. Currently
2888 // the conditions help to simplify the code and allow us to reuse existing
2889 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2890 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2891 continue;
2892
2893 UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Pred->getTerminator());
2894 if (!PredTerm)
2895 continue;
2896
2897 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2898 return true;
2899 }
2900 return false;
2901}
2902
2903/// tryToUnfoldSelect - Look for blocks of the form
2904/// bb1:
2905/// %a = select
2906/// br bb2
2907///
2908/// bb2:
2909/// %p = phi [%a, %bb1] ...
2910/// %c = icmp %p
2911/// br i1 %c
2912///
2913/// And expand the select into a branch structure if one of its arms allows %c
2914/// to be folded. This later enables threading from bb1 over bb2.
2917 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2918 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2919
2920 if (!CondBr || !CondLHS || CondLHS->getParent() != BB)
2921 return false;
2922
2923 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2924 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2926
2927 // Look if one of the incoming values is a select in the corresponding
2928 // predecessor.
2929 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2930 continue;
2931
2932 UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Pred->getTerminator());
2933 if (!PredTerm)
2934 continue;
2935
2936 // Now check if one of the select values would allow us to constant fold the
2937 // terminator in BB. We don't do the transform if both sides fold, those
2938 // cases will be threaded in any case.
2939 Constant *LHSRes =
2940 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2941 CondRHS, Pred, BB, CondCmp);
2942 Constant *RHSRes =
2943 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2944 CondRHS, Pred, BB, CondCmp);
2945 if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2946 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2947 return true;
2948 }
2949 }
2950 return false;
2951}
2952
2953/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2954/// same BB in the form
2955/// bb:
2956/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2957/// %s = select %p, trueval, falseval
2958///
2959/// or
2960///
2961/// bb:
2962/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2963/// %c = cmp %p, 0
2964/// %s = select %c, trueval, falseval
2965///
2966/// And expand the select into a branch structure. This later enables
2967/// jump-threading over bb in this pass.
2968///
2969/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2970/// select if the associated PHI has at least one constant. If the unfolded
2971/// select is not jump-threaded, it will be folded again in the later
2972/// optimizations.
2974 // This transform would reduce the quality of msan diagnostics.
2975 // Disable this transform under MemorySanitizer.
2976 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2977 return false;
2978
2979 // If threading this would thread across a loop header, don't thread the edge.
2980 // See the comments above findLoopHeaders for justifications and caveats.
2981 if (LoopHeaders.count(BB))
2982 return false;
2983
2984 for (BasicBlock::iterator BI = BB->begin();
2985 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2986 // Look for a Phi having at least one constant incoming value.
2987 if (llvm::all_of(PN->incoming_values(),
2988 [](Value *V) { return !isa<ConstantInt>(V); }))
2989 continue;
2990
2991 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2992 using namespace PatternMatch;
2993
2994 // Check if SI is in BB and use V as condition.
2995 if (SI->getParent() != BB)
2996 return false;
2997 Value *Cond = SI->getCondition();
2998 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2999 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
3000 };
3001
3002 SelectInst *SI = nullptr;
3003 for (Use &U : PN->uses()) {
3004 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
3005 // Look for a ICmp in BB that compares PN with a constant and is the
3006 // condition of a Select.
3007 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
3008 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
3009 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
3010 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
3011 SI = SelectI;
3012 break;
3013 }
3014 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
3015 // Look for a Select in BB that uses PN as condition.
3016 if (isUnfoldCandidate(SelectI, U.get())) {
3017 SI = SelectI;
3018 break;
3019 }
3020 }
3021 }
3022
3023 if (!SI)
3024 continue;
3025 // Expand the select.
3026 Value *Cond = SI->getCondition();
3027 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) {
3028 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
3030 }
3031 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
3032 Instruction *Term =
3033 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
3034 BasicBlock *SplitBB = SI->getParent();
3035 BasicBlock *NewBB = Term->getParent();
3036 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
3037 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
3038 NewPN->addIncoming(SI->getFalseValue(), BB);
3039 NewPN->setDebugLoc(SI->getDebugLoc());
3040 SI->replaceAllUsesWith(NewPN);
3041
3042 auto *BPI = getBPI();
3043 auto *BFI = getBFI();
3044 if (!ProfcheckDisableMetadataFixes && BranchWeights) {
3046 [[maybe_unused]] bool Extracted = extractBranchWeights(BranchWeights, BW);
3047 assert(Extracted);
3048 uint64_t Denominator =
3050 assert(Denominator > 0 &&
3051 "At least one of the branch probabilities should be non-zero");
3052 BranchProbability TrueProb =
3053 BranchProbability::getBranchProbability(BW[0], Denominator);
3054 BranchProbability FalseProb =
3055 BranchProbability::getBranchProbability(BW[1], Denominator);
3056 SmallVector<BranchProbability, 2> BP = {TrueProb, FalseProb};
3057
3058 if (BPI)
3059 BPI->setEdgeProbability(BB, BP);
3060
3061 if (BFI) {
3062 auto BBOrigFreq = BFI->getBlockFreq(BB);
3063 auto NewBBFreq = BBOrigFreq * TrueProb;
3064 BFI->setBlockFreq(NewBB, NewBBFreq);
3065 BFI->setBlockFreq(SplitBB, BBOrigFreq);
3066 }
3067 }
3068 SI->eraseFromParent();
3069 // NewBB and SplitBB are newly created blocks which require insertion.
3070 std::vector<DominatorTree::UpdateType> Updates;
3071 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
3072 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
3073 Updates.push_back({DominatorTree::Insert, BB, NewBB});
3074 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
3075 // BB's successors were moved to SplitBB, update DTU accordingly.
3076 for (auto *Succ : successors(SplitBB)) {
3077 Updates.push_back({DominatorTree::Delete, BB, Succ});
3078 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
3079 }
3080 DTU->applyUpdatesPermissive(Updates);
3081 return true;
3082 }
3083 return false;
3084}
3085
3086/// Try to propagate a guard from the current BB into one of its predecessors
3087/// in case if another branch of execution implies that the condition of this
3088/// guard is always true. Currently we only process the simplest case that
3089/// looks like:
3090///
3091/// Start:
3092/// %cond = ...
3093/// br i1 %cond, label %T1, label %F1
3094/// T1:
3095/// br label %Merge
3096/// F1:
3097/// br label %Merge
3098/// Merge:
3099/// %condGuard = ...
3100/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3101///
3102/// And cond either implies condGuard or !condGuard. In this case all the
3103/// instructions before the guard can be duplicated in both branches, and the
3104/// guard is then threaded to one of them.
3106 using namespace PatternMatch;
3107
3108 // We only want to deal with two predecessors.
3109 BasicBlock *Pred1, *Pred2;
3110 auto PI = pred_begin(BB), PE = pred_end(BB);
3111 if (PI == PE)
3112 return false;
3113 Pred1 = *PI++;
3114 if (PI == PE)
3115 return false;
3116 Pred2 = *PI++;
3117 if (PI != PE)
3118 return false;
3119 if (Pred1 == Pred2)
3120 return false;
3121
3122 // Try to thread one of the guards of the block.
3123 // TODO: Look up deeper than to immediate predecessor?
3124 auto *Parent = Pred1->getSinglePredecessor();
3125 if (!Parent || Parent != Pred2->getSinglePredecessor())
3126 return false;
3127
3128 if (auto *BI = dyn_cast<CondBrInst>(Parent->getTerminator()))
3129 for (auto &I : *BB)
3130 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3131 return true;
3132
3133 return false;
3134}
3135
3136/// Try to propagate the guard from BB which is the lower block of a diamond
3137/// to one of its branches, in case if diamond's condition implies guard's
3138/// condition.
3140 CondBrInst *BI) {
3141 Value *GuardCond = Guard->getArgOperand(0);
3142 Value *BranchCond = BI->getCondition();
3143 BasicBlock *TrueDest = BI->getSuccessor(0);
3144 BasicBlock *FalseDest = BI->getSuccessor(1);
3145
3146 auto &DL = BB->getDataLayout();
3147 bool TrueDestIsSafe = false;
3148 bool FalseDestIsSafe = false;
3149
3150 // True dest is safe if BranchCond => GuardCond.
3151 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3152 if (Impl && *Impl)
3153 TrueDestIsSafe = true;
3154 else {
3155 // False dest is safe if !BranchCond => GuardCond.
3156 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3157 if (Impl && *Impl)
3158 FalseDestIsSafe = true;
3159 }
3160
3161 if (!TrueDestIsSafe && !FalseDestIsSafe)
3162 return false;
3163
3164 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3165 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3166
3167 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3168 Instruction *AfterGuard = Guard->getNextNode();
3169 unsigned Cost =
3170 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3171 if (Cost > BBDupThreshold)
3172 return false;
3173 // Duplicate all instructions before the guard and the guard itself to the
3174 // branch where implication is not proved.
3176 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3177 assert(GuardedBlock && "Could not create the guarded block?");
3178 // Duplicate all instructions before the guard in the unguarded branch.
3179 // Since we have successfully duplicated the guarded block and this block
3180 // has fewer instructions, we expect it to succeed.
3182 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3183 assert(UnguardedBlock && "Could not create the unguarded block?");
3184 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3185 << GuardedBlock->getName() << "\n");
3186 // Some instructions before the guard may still have uses. For them, we need
3187 // to create Phi nodes merging their copies in both guarded and unguarded
3188 // branches. Those instructions that have no uses can be just removed.
3190 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3191 if (!isa<PHINode>(&*BI))
3192 ToRemove.push_back(&*BI);
3193
3195 assert(InsertionPoint != BB->end() && "Empty block?");
3196 // Substitute with Phis & remove.
3197 for (auto *Inst : reverse(ToRemove)) {
3198 if (!Inst->use_empty()) {
3199 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3200 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3201 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3202 NewPN->setDebugLoc(Inst->getDebugLoc());
3204 Inst->replaceAllUsesWith(NewPN);
3205 }
3206 Inst->dropDbgRecords();
3207 Inst->eraseFromParent();
3208 }
3209 return true;
3210}
3211
3212PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3216
3217 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3218 // TODO: Would be nice to verify BPI/BFI consistency as well.
3219 return PA;
3220}
3221
3222template <typename AnalysisT>
3223typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3224 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3225
3226 // If there were no changes since last call to 'runExternalAnalysis' then all
3227 // analysis is either up to date or explicitly invalidated. Just go ahead and
3228 // run the "external" analysis.
3229 if (!ChangedSinceLastAnalysisUpdate) {
3230 assert(!DTU->hasPendingUpdates() &&
3231 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3232 // Run the "external" analysis.
3233 return &FAM->getResult<AnalysisT>(*F);
3234 }
3235 ChangedSinceLastAnalysisUpdate = false;
3236
3237 auto PA = getPreservedAnalysis();
3238 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3239 // as preserved.
3240 PA.preserve<BranchProbabilityAnalysis>();
3241 PA.preserve<BlockFrequencyAnalysis>();
3242 // Report everything except explicitly preserved as invalid.
3243 FAM->invalidate(*F, PA);
3244 // Update DT/PDT.
3245 DTU->flush();
3246 // Make sure DT/PDT are valid before running "external" analysis.
3247 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3248 assert((!DTU->hasPostDomTree() ||
3249 DTU->getPostDomTree().verify(
3250 PostDominatorTree::VerificationLevel::Fast)));
3251 // Run the "external" analysis.
3252 auto *Result = &FAM->getResult<AnalysisT>(*F);
3253 // Update analysis JumpThreading depends on and not explicitly preserved.
3254 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3255 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3256 AA = &FAM->getResult<AAManager>(*F);
3257
3258 return Result;
3259}
3260
3261BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3262 if (!BPI) {
3263 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3264 BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(*F);
3265 }
3266 return BPI;
3267}
3268
3269BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3270 if (!BFI) {
3271 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3272 BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(*F);
3273 }
3274 return BFI;
3275}
3276
3277// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3278// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3279// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3280BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3281 auto *Res = getBPI();
3282 if (Res)
3283 return Res;
3284
3285 if (Force)
3286 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3287
3288 return BPI;
3289}
3290
3291BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3292 auto *Res = getBFI();
3293 if (Res)
3294 return Res;
3295
3296 if (Force)
3297 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3298
3299 return BFI;
3300}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static constexpr Value * getValue(Ty &ValueOrUse)
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void remapSourceAtoms(ValueToValueMapTy &VM, BasicBlock::iterator Begin, BasicBlock::iterator End)
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
ppc ctr loops verify
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition blake3_impl.h:83
A manager for alias analyses.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition BasicBlock.h:704
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition Constants.h:1082
static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Analysis providing branch probability information.
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * getNot(Constant *C)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Per-instruction record of debug-info.
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
LLVM_ABI const BasicBlock * getParent() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getTemporary()
Definition DebugLoc.h:150
static DebugLoc getDropped()
Definition DebugLoc.h:153
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition Function.h:809
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
bool isSpecialTerminator() const
A wrapper class for inspecting calls to intrinsic functions.
LLVM_ABI bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
LLVM_ABI bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
LLVM_ABI bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
LLVM_ABI void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
LLVM_ABI bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
LLVM_ABI JumpThreadingPass(int T=-1)
LLVM_ABI void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
LLVM_ABI bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
LLVM_ABI bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
LLVM_ABI bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
LLVM_ABI void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
LLVM_ABI bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI)
LLVM_ABI bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, CondBrInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
LLVM_ABI bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
LLVM_ABI bool processImpliedCondition(BasicBlock *BB)
LLVM_ABI bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
LLVM_ABI void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
LLVM_ABI bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
LLVM_ABI bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
LLVM_ABI bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
LLVM_ABI void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
An instruction for reading from memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
bool isUnordered() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
Metadata node.
Definition Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Representation for a specific memory location.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
LLVM_ABI void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
LLVM_ABI void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
LLVM_ABI void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
LLVM_ABI void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
size_type size() const
Definition SmallPtrSet.h:99
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
'undef' values are things that do not have specified contents.
Definition Constants.h:1625
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
See the file comment.
Definition ValueMap.h:84
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
ValueMapIteratorImpl< MapT, const Value *, false > iterator
Definition ValueMap.h:135
DMAtomT AtomMap
Map {(InlinedAt, old atom number) -> new atom number}.
Definition ValueMap.h:123
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1128
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
A private "module" namespace for types and utilities used by JumpThreading.
SmallVector< std::pair< Constant *, BasicBlock * >, 8 > PredValueInfoTy
SmallVectorImpl< std::pair< Constant *, BasicBlock * > > PredValueInfo
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned long > StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden, cl::desc("Vectorize if the invocation count is < than this. 0 " "disables vectorization."))
InstructionCost Cost
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
auto pred_end(const MachineBasicBlock *BB)
LLVM_ABI unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition Local.cpp:3241
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
constexpr from_range_t from_range
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
LLVM_ABI Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition Loads.cpp:689
LLVM_ABI void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition Local.cpp:3477
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:723
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition Loads.cpp:554
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
LLVM_ABI BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
LLVM_ABI bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
LLVM_ABI bool HasLoopOrEntryConvergenceToken(const BasicBlock *BB)
Check if the given basic block contains any loop or entry convergent intrinsic instructions.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
TargetTransformInfo TTI
LLVM_ABI void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition Local.cpp:763
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2051
LLVM_ABI void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2087
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1408
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1716
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1595
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition CFG.cpp:36
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
Function object to check whether the second component of a container supported by std::get (like std:...
Definition STLExtras.h:1447