LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
98namespace llvm {
99
101 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
102
103 cl::desc(
104 "Temporary development switch used to gradually uplift SimplifyCFG "
105 "into preserving DomTree,"));
106
107// Chosen as 2 so as to be cheap, but still to have enough power to fold
108// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
109// To catch this, we need to fold a compare and a select, hence '2' being the
110// minimum reasonable default.
112 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
113 cl::desc(
114 "Control the amount of phi node folding to perform (default = 2)"));
115
117 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
118 cl::desc("Control the maximal total instruction cost that we are willing "
119 "to speculatively execute to fold a 2-entry PHI node into a "
120 "select (default = 4)"));
121
122static cl::opt<bool>
123 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
124 cl::desc("Hoist common instructions up to the parent block"));
125
127 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
128 cl::desc("Hoist loads if the target supports conditional faulting"));
129
131 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
132 cl::desc("Hoist stores if the target supports conditional faulting"));
133
135 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
136 cl::desc("Control the maximal conditional load/store that we are willing "
137 "to speculatively execute to eliminate conditional branch "
138 "(default = 6)"));
139
141 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
142 cl::init(20),
143 cl::desc("Allow reordering across at most this many "
144 "instructions when hoisting"));
145
146static cl::opt<bool>
147 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
148 cl::desc("Sink common instructions down to the end block"));
149
151 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
152 cl::desc("Hoist conditional stores if an unconditional store precedes"));
153
155 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
156 cl::desc("Hoist conditional stores even if an unconditional store does not "
157 "precede - hoist multiple conditional stores into a single "
158 "predicated store"));
159
161 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
162 cl::desc("When merging conditional stores, do so even if the resultant "
163 "basic blocks are unlikely to be if-converted as a result"));
164
166 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
167 cl::desc("Allow exactly one expensive instruction to be speculatively "
168 "executed"));
169
171 "max-speculation-depth", cl::Hidden, cl::init(10),
172 cl::desc("Limit maximum recursion depth when calculating costs of "
173 "speculatively executed instructions"));
174
175static cl::opt<int>
176 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
177 cl::init(10),
178 cl::desc("Max size of a block which is still considered "
179 "small enough to thread through"));
180
181// Two is chosen to allow one negation and a logical combine.
183 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
184 cl::init(2),
185 cl::desc("Maximum cost of combining conditions when "
186 "folding branches"));
187
189 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
190 cl::init(2),
191 cl::desc("Multiplier to apply to threshold when determining whether or not "
192 "to fold branch to common destination when vector operations are "
193 "present"));
194
196 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
197 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
198
200 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
201 cl::desc("Limit cases to analyze when converting a switch to select"));
202
204 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
205 cl::desc("Limit number of blocks a define in a threaded block is allowed "
206 "to be live in"));
207
209
210} // end namespace llvm
211
212STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
213STATISTIC(NumLinearMaps,
214 "Number of switch instructions turned into linear mapping");
215STATISTIC(NumLookupTables,
216 "Number of switch instructions turned into lookup tables");
218 NumLookupTablesHoles,
219 "Number of switch instructions turned into lookup tables (holes checked)");
220STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
221STATISTIC(NumFoldValueComparisonIntoPredecessors,
222 "Number of value comparisons folded into predecessor basic blocks");
223STATISTIC(NumFoldBranchToCommonDest,
224 "Number of branches folded into predecessor basic block");
226 NumHoistCommonCode,
227 "Number of common instruction 'blocks' hoisted up to the begin block");
228STATISTIC(NumHoistCommonInstrs,
229 "Number of common instructions hoisted up to the begin block");
230STATISTIC(NumSinkCommonCode,
231 "Number of common instruction 'blocks' sunk down to the end block");
232STATISTIC(NumSinkCommonInstrs,
233 "Number of common instructions sunk down to the end block");
234STATISTIC(NumSpeculations, "Number of speculative executed instructions");
235STATISTIC(NumInvokes,
236 "Number of invokes with empty resume blocks simplified into calls");
237STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
238STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
239
240namespace {
241
242// The first field contains the value that the switch produces when a certain
243// case group is selected, and the second field is a vector containing the
244// cases composing the case group.
245using SwitchCaseResultVectorTy =
247
248// The first field contains the phi node that generates a result of the switch
249// and the second field contains the value generated for a certain case in the
250// switch for that PHI.
251using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
252
253/// ValueEqualityComparisonCase - Represents a case of a switch.
254struct ValueEqualityComparisonCase {
256 BasicBlock *Dest;
257
258 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
259 : Value(Value), Dest(Dest) {}
260
261 bool operator<(ValueEqualityComparisonCase RHS) const {
262 // Comparing pointers is ok as we only rely on the order for uniquing.
263 return Value < RHS.Value;
264 }
265
266 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
267};
268
269class SimplifyCFGOpt {
270 const TargetTransformInfo &TTI;
271 DomTreeUpdater *DTU;
272 const DataLayout &DL;
273 ArrayRef<WeakVH> LoopHeaders;
274 const SimplifyCFGOptions &Options;
275 bool Resimplify;
276
277 Value *isValueEqualityComparison(Instruction *TI);
278 BasicBlock *getValueEqualityComparisonCases(
279 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
280 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
281 BasicBlock *Pred,
282 IRBuilder<> &Builder);
283 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
284 Instruction *PTI,
285 IRBuilder<> &Builder);
286 bool foldValueComparisonIntoPredecessors(Instruction *TI,
287 IRBuilder<> &Builder);
288
289 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
290 bool simplifySingleResume(ResumeInst *RI);
291 bool simplifyCommonResume(ResumeInst *RI);
292 bool simplifyCleanupReturn(CleanupReturnInst *RI);
293 bool simplifyUnreachable(UnreachableInst *UI);
294 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
295 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
296 bool simplifyIndirectBr(IndirectBrInst *IBI);
297 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
298 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304
305 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
306 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
307 Instruction *TI, Instruction *I1,
308 SmallVectorImpl<Instruction *> &OtherSuccTIs);
309 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
310 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
311 BasicBlock *TrueBB, BasicBlock *FalseBB,
312 uint32_t TrueWeight, uint32_t FalseWeight);
313 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
314 const DataLayout &DL);
315 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
316 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
317 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
318
319public:
320 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
321 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
322 const SimplifyCFGOptions &Opts)
323 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
324 assert((!DTU || !DTU->hasPostDomTree()) &&
325 "SimplifyCFG is not yet capable of maintaining validity of a "
326 "PostDomTree, so don't ask for it.");
327 }
328
329 bool simplifyOnce(BasicBlock *BB);
330 bool run(BasicBlock *BB);
331
332 // Helper to set Resimplify and return change indication.
333 bool requestResimplify() {
334 Resimplify = true;
335 return true;
336 }
337};
338
339// we synthesize a || b as select a, true, b
340// we synthesize a && b as select a, b, false
341// this function determines if SI is playing one of those roles.
342[[maybe_unused]] bool
343isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
344 return ((isa<ConstantInt>(SI->getTrueValue()) &&
345 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
346 (isa<ConstantInt>(SI->getFalseValue()) &&
347 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
348}
349
350} // end anonymous namespace
351
352/// Return true if all the PHI nodes in the basic block \p BB
353/// receive compatible (identical) incoming values when coming from
354/// all of the predecessor blocks that are specified in \p IncomingBlocks.
355///
356/// Note that if the values aren't exactly identical, but \p EquivalenceSet
357/// is provided, and *both* of the values are present in the set,
358/// then they are considered equal.
360 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
361 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
362 assert(IncomingBlocks.size() == 2 &&
363 "Only for a pair of incoming blocks at the time!");
364
365 // FIXME: it is okay if one of the incoming values is an `undef` value,
366 // iff the other incoming value is guaranteed to be a non-poison value.
367 // FIXME: it is okay if one of the incoming values is a `poison` value.
368 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
369 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
370 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
371 if (IV0 == IV1)
372 return true;
373 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
374 EquivalenceSet->contains(IV1))
375 return true;
376 return false;
377 });
378}
379
380/// Return true if it is safe to merge these two
381/// terminator instructions together.
382static bool
384 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
385 if (SI1 == SI2)
386 return false; // Can't merge with self!
387
388 // It is not safe to merge these two switch instructions if they have a common
389 // successor, and if that successor has a PHI node, and if *that* PHI node has
390 // conflicting incoming values from the two switch blocks.
391 BasicBlock *SI1BB = SI1->getParent();
392 BasicBlock *SI2BB = SI2->getParent();
393
395 bool Fail = false;
396 for (BasicBlock *Succ : successors(SI2BB)) {
397 if (!SI1Succs.count(Succ))
398 continue;
399 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
400 continue;
401 Fail = true;
402 if (FailBlocks)
403 FailBlocks->insert(Succ);
404 else
405 break;
406 }
407
408 return !Fail;
409}
410
411/// Update PHI nodes in Succ to indicate that there will now be entries in it
412/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
413/// will be the same as those coming in from ExistPred, an existing predecessor
414/// of Succ.
415static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
416 BasicBlock *ExistPred,
417 MemorySSAUpdater *MSSAU = nullptr) {
418 for (PHINode &PN : Succ->phis())
419 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
420 if (MSSAU)
421 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
422 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
423}
424
425/// Compute an abstract "cost" of speculating the given instruction,
426/// which is assumed to be safe to speculate. TCC_Free means cheap,
427/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
428/// expensive.
430 const TargetTransformInfo &TTI) {
431 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
432}
433
434/// If we have a merge point of an "if condition" as accepted above,
435/// return true if the specified value dominates the block. We don't handle
436/// the true generality of domination here, just a special case which works
437/// well enough for us.
438///
439/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
440/// see if V (which must be an instruction) and its recursive operands
441/// that do not dominate BB have a combined cost lower than Budget and
442/// are non-trapping. If both are true, the instruction is inserted into the
443/// set and true is returned.
444///
445/// The cost for most non-trapping instructions is defined as 1 except for
446/// Select whose cost is 2.
447///
448/// After this function returns, Cost is increased by the cost of
449/// V plus its non-dominating operands. If that cost is greater than
450/// Budget, false is returned and Cost is undefined.
452 Value *V, BasicBlock *BB, Instruction *InsertPt,
453 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
455 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
456 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
457 // so limit the recursion depth.
458 // TODO: While this recursion limit does prevent pathological behavior, it
459 // would be better to track visited instructions to avoid cycles.
461 return false;
462
464 if (!I) {
465 // Non-instructions dominate all instructions and can be executed
466 // unconditionally.
467 return true;
468 }
469 BasicBlock *PBB = I->getParent();
470
471 // We don't want to allow weird loops that might have the "if condition" in
472 // the bottom of this block.
473 if (PBB == BB)
474 return false;
475
476 // If this instruction is defined in a block that contains an unconditional
477 // branch to BB, then it must be in the 'conditional' part of the "if
478 // statement". If not, it definitely dominates the region.
480 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
481 return true;
482
483 // If we have seen this instruction before, don't count it again.
484 if (AggressiveInsts.count(I))
485 return true;
486
487 // Okay, it looks like the instruction IS in the "condition". Check to
488 // see if it's a cheap instruction to unconditionally compute, and if it
489 // only uses stuff defined outside of the condition. If so, hoist it out.
490 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
491 return false;
492
493 // Overflow arithmetic instruction plus extract value are usually generated
494 // when a division is being replaced. But, in this case, the zero check may
495 // still be kept in the code. In that case it would be worth to hoist these
496 // two instruction out of the basic block. Let's treat this pattern as one
497 // single cheap instruction here!
498 WithOverflowInst *OverflowInst;
499 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
500 ZeroCostInstructions.insert(OverflowInst);
501 Cost += 1;
502 } else if (!ZeroCostInstructions.contains(I))
503 Cost += computeSpeculationCost(I, TTI);
504
505 // Allow exactly one instruction to be speculated regardless of its cost
506 // (as long as it is safe to do so).
507 // This is intended to flatten the CFG even if the instruction is a division
508 // or other expensive operation. The speculation of an expensive instruction
509 // is expected to be undone in CodeGenPrepare if the speculation has not
510 // enabled further IR optimizations.
511 if (Cost > Budget &&
512 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
513 !Cost.isValid()))
514 return false;
515
516 // Okay, we can only really hoist these out if their operands do
517 // not take us over the cost threshold.
518 for (Use &Op : I->operands())
519 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
520 TTI, AC, ZeroCostInstructions, Depth + 1))
521 return false;
522 // Okay, it's safe to do this! Remember this instruction.
523 AggressiveInsts.insert(I);
524 return true;
525}
526
527/// Extract ConstantInt from value, looking through IntToPtr
528/// and PointerNullValue. Return NULL if value is not a constant int.
530 // Normal constant int.
532 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
533 return CI;
534
535 // It is not safe to look through inttoptr or ptrtoint when using unstable
536 // pointer types.
537 if (DL.hasUnstableRepresentation(V->getType()))
538 return nullptr;
539
540 // This is some kind of pointer constant. Turn it into a pointer-sized
541 // ConstantInt if possible.
542 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
543
544 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
546 return ConstantInt::get(IntPtrTy, 0);
547
548 // IntToPtr const int, we can look through this if the semantics of
549 // inttoptr for this address space are a simple (truncating) bitcast.
551 if (CE->getOpcode() == Instruction::IntToPtr)
552 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
553 // The constant is very likely to have the right type already.
554 if (CI->getType() == IntPtrTy)
555 return CI;
556 else
557 return cast<ConstantInt>(
558 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
559 }
560 return nullptr;
561}
562
563namespace {
564
565/// Given a chain of or (||) or and (&&) comparison of a value against a
566/// constant, this will try to recover the information required for a switch
567/// structure.
568/// It will depth-first traverse the chain of comparison, seeking for patterns
569/// like %a == 12 or %a < 4 and combine them to produce a set of integer
570/// representing the different cases for the switch.
571/// Note that if the chain is composed of '||' it will build the set of elements
572/// that matches the comparisons (i.e. any of this value validate the chain)
573/// while for a chain of '&&' it will build the set elements that make the test
574/// fail.
575struct ConstantComparesGatherer {
576 const DataLayout &DL;
577
578 /// Value found for the switch comparison
579 Value *CompValue = nullptr;
580
581 /// Extra clause to be checked before the switch
582 Value *Extra = nullptr;
583
584 /// Set of integers to match in switch
586
587 /// Number of comparisons matched in the and/or chain
588 unsigned UsedICmps = 0;
589
590 /// If the elements in Vals matches the comparisons
591 bool IsEq = false;
592
593 // Used to check if the first matched CompValue shall be the Extra check.
594 bool IgnoreFirstMatch = false;
595 bool MultipleMatches = false;
596
597 /// Construct and compute the result for the comparison instruction Cond
598 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
599 gather(Cond);
600 if (CompValue || !MultipleMatches)
601 return;
602 Extra = nullptr;
603 Vals.clear();
604 UsedICmps = 0;
605 IgnoreFirstMatch = true;
606 gather(Cond);
607 }
608
609 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
610 ConstantComparesGatherer &
611 operator=(const ConstantComparesGatherer &) = delete;
612
613private:
614 /// Try to set the current value used for the comparison, it succeeds only if
615 /// it wasn't set before or if the new value is the same as the old one
616 bool setValueOnce(Value *NewVal) {
617 if (IgnoreFirstMatch) {
618 IgnoreFirstMatch = false;
619 return false;
620 }
621 if (CompValue && CompValue != NewVal) {
622 MultipleMatches = true;
623 return false;
624 }
625 CompValue = NewVal;
626 return true;
627 }
628
629 /// Try to match Instruction "I" as a comparison against a constant and
630 /// populates the array Vals with the set of values that match (or do not
631 /// match depending on isEQ).
632 /// Return false on failure. On success, the Value the comparison matched
633 /// against is placed in CompValue.
634 /// If CompValue is already set, the function is expected to fail if a match
635 /// is found but the value compared to is different.
636 bool matchInstruction(Instruction *I, bool isEQ) {
637 if (match(I, m_Not(m_Instruction(I))))
638 isEQ = !isEQ;
639
640 Value *Val;
641 if (match(I, m_NUWTrunc(m_Value(Val)))) {
642 // If we already have a value for the switch, it has to match!
643 if (!setValueOnce(Val))
644 return false;
645 UsedICmps++;
646 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
647 return true;
648 }
649 // If this is an icmp against a constant, handle this as one of the cases.
650 ICmpInst *ICI;
651 ConstantInt *C;
652 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
653 (C = getConstantInt(I->getOperand(1), DL)))) {
654 return false;
655 }
656
657 Value *RHSVal;
658 const APInt *RHSC;
659
660 // Pattern match a special case
661 // (x & ~2^z) == y --> x == y || x == y|2^z
662 // This undoes a transformation done by instcombine to fuse 2 compares.
663 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
664 // It's a little bit hard to see why the following transformations are
665 // correct. Here is a CVC3 program to verify them for 64-bit values:
666
667 /*
668 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
669 x : BITVECTOR(64);
670 y : BITVECTOR(64);
671 z : BITVECTOR(64);
672 mask : BITVECTOR(64) = BVSHL(ONE, z);
673 QUERY( (y & ~mask = y) =>
674 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
675 );
676 QUERY( (y | mask = y) =>
677 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
678 );
679 */
680
681 // Please note that each pattern must be a dual implication (<--> or
682 // iff). One directional implication can create spurious matches. If the
683 // implication is only one-way, an unsatisfiable condition on the left
684 // side can imply a satisfiable condition on the right side. Dual
685 // implication ensures that satisfiable conditions are transformed to
686 // other satisfiable conditions and unsatisfiable conditions are
687 // transformed to other unsatisfiable conditions.
688
689 // Here is a concrete example of a unsatisfiable condition on the left
690 // implying a satisfiable condition on the right:
691 //
692 // mask = (1 << z)
693 // (x & ~mask) == y --> (x == y || x == (y | mask))
694 //
695 // Substituting y = 3, z = 0 yields:
696 // (x & -2) == 3 --> (x == 3 || x == 2)
697
698 // Pattern match a special case:
699 /*
700 QUERY( (y & ~mask = y) =>
701 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
702 );
703 */
704 if (match(ICI->getOperand(0),
705 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
706 APInt Mask = ~*RHSC;
707 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
708 // If we already have a value for the switch, it has to match!
709 if (!setValueOnce(RHSVal))
710 return false;
711
712 Vals.push_back(C);
713 Vals.push_back(
714 ConstantInt::get(C->getContext(),
715 C->getValue() | Mask));
716 UsedICmps++;
717 return true;
718 }
719 }
720
721 // Pattern match a special case:
722 /*
723 QUERY( (y | mask = y) =>
724 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
725 );
726 */
727 if (match(ICI->getOperand(0),
728 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
729 APInt Mask = *RHSC;
730 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
731 // If we already have a value for the switch, it has to match!
732 if (!setValueOnce(RHSVal))
733 return false;
734
735 Vals.push_back(C);
736 Vals.push_back(ConstantInt::get(C->getContext(),
737 C->getValue() & ~Mask));
738 UsedICmps++;
739 return true;
740 }
741 }
742
743 // If we already have a value for the switch, it has to match!
744 if (!setValueOnce(ICI->getOperand(0)))
745 return false;
746
747 UsedICmps++;
748 Vals.push_back(C);
749 return true;
750 }
751
752 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
753 ConstantRange Span =
755
756 // Shift the range if the compare is fed by an add. This is the range
757 // compare idiom as emitted by instcombine.
758 Value *CandidateVal = I->getOperand(0);
759 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
760 Span = Span.subtract(*RHSC);
761 CandidateVal = RHSVal;
762 }
763
764 // If this is an and/!= check, then we are looking to build the set of
765 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
766 // x != 0 && x != 1.
767 if (!isEQ)
768 Span = Span.inverse();
769
770 // If there are a ton of values, we don't want to make a ginormous switch.
771 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
772 return false;
773 }
774
775 // If we already have a value for the switch, it has to match!
776 if (!setValueOnce(CandidateVal))
777 return false;
778
779 // Add all values from the range to the set
780 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
781 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
782
783 UsedICmps++;
784 return true;
785 }
786
787 /// Given a potentially 'or'd or 'and'd together collection of icmp
788 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
789 /// the value being compared, and stick the list constants into the Vals
790 /// vector.
791 /// One "Extra" case is allowed to differ from the other.
792 void gather(Value *V) {
793 Value *Op0, *Op1;
794 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
795 IsEq = true;
796 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
797 IsEq = false;
798 else
799 return;
800 // Keep a stack (SmallVector for efficiency) for depth-first traversal
801 SmallVector<Value *, 8> DFT{Op0, Op1};
802 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
803
804 while (!DFT.empty()) {
805 V = DFT.pop_back_val();
806
807 if (Instruction *I = dyn_cast<Instruction>(V)) {
808 // If it is a || (or && depending on isEQ), process the operands.
809 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
810 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
811 if (Visited.insert(Op1).second)
812 DFT.push_back(Op1);
813 if (Visited.insert(Op0).second)
814 DFT.push_back(Op0);
815
816 continue;
817 }
818
819 // Try to match the current instruction
820 if (matchInstruction(I, IsEq))
821 // Match succeed, continue the loop
822 continue;
823 }
824
825 // One element of the sequence of || (or &&) could not be match as a
826 // comparison against the same value as the others.
827 // We allow only one "Extra" case to be checked before the switch
828 if (!Extra) {
829 Extra = V;
830 continue;
831 }
832 // Failed to parse a proper sequence, abort now
833 CompValue = nullptr;
834 break;
835 }
836 }
837};
838
839} // end anonymous namespace
840
842 MemorySSAUpdater *MSSAU = nullptr) {
843 Instruction *Cond = nullptr;
845 Cond = dyn_cast<Instruction>(SI->getCondition());
846 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
847 if (BI->isConditional())
848 Cond = dyn_cast<Instruction>(BI->getCondition());
849 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
850 Cond = dyn_cast<Instruction>(IBI->getAddress());
851 }
852
853 TI->eraseFromParent();
854 if (Cond)
856}
857
858/// Return true if the specified terminator checks
859/// to see if a value is equal to constant integer value.
860Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
861 Value *CV = nullptr;
862 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
863 // Do not permit merging of large switch instructions into their
864 // predecessors unless there is only one predecessor.
865 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
866 CV = SI->getCondition();
867 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
868 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
869 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
870 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
871 CV = ICI->getOperand(0);
872 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
873 if (Trunc->hasNoUnsignedWrap())
874 CV = Trunc->getOperand(0);
875 }
876 }
877
878 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
879 if (CV) {
880 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
881 Value *Ptr = PTII->getPointerOperand();
882 if (DL.hasUnstableRepresentation(Ptr->getType()))
883 return CV;
884 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
885 CV = Ptr;
886 }
887 }
888 return CV;
889}
890
891/// Given a value comparison instruction,
892/// decode all of the 'cases' that it represents and return the 'default' block.
893BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
894 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
895 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
896 Cases.reserve(SI->getNumCases());
897 for (auto Case : SI->cases())
898 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
899 Case.getCaseSuccessor()));
900 return SI->getDefaultDest();
901 }
902
903 BranchInst *BI = cast<BranchInst>(TI);
904 Value *Cond = BI->getCondition();
905 ICmpInst::Predicate Pred;
906 ConstantInt *C;
907 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
908 Pred = ICI->getPredicate();
909 C = getConstantInt(ICI->getOperand(1), DL);
910 } else {
911 Pred = ICmpInst::ICMP_NE;
912 auto *Trunc = cast<TruncInst>(Cond);
913 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
914 }
915 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
916 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
917 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
918}
919
920/// Given a vector of bb/value pairs, remove any entries
921/// in the list that match the specified block.
922static void
924 std::vector<ValueEqualityComparisonCase> &Cases) {
925 llvm::erase(Cases, BB);
926}
927
928/// Return true if there are any keys in C1 that exist in C2 as well.
929static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
930 std::vector<ValueEqualityComparisonCase> &C2) {
931 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
932
933 // Make V1 be smaller than V2.
934 if (V1->size() > V2->size())
935 std::swap(V1, V2);
936
937 if (V1->empty())
938 return false;
939 if (V1->size() == 1) {
940 // Just scan V2.
941 ConstantInt *TheVal = (*V1)[0].Value;
942 for (const ValueEqualityComparisonCase &VECC : *V2)
943 if (TheVal == VECC.Value)
944 return true;
945 }
946
947 // Otherwise, just sort both lists and compare element by element.
948 array_pod_sort(V1->begin(), V1->end());
949 array_pod_sort(V2->begin(), V2->end());
950 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
951 while (i1 != e1 && i2 != e2) {
952 if ((*V1)[i1].Value == (*V2)[i2].Value)
953 return true;
954 if ((*V1)[i1].Value < (*V2)[i2].Value)
955 ++i1;
956 else
957 ++i2;
958 }
959 return false;
960}
961
962/// If TI is known to be a terminator instruction and its block is known to
963/// only have a single predecessor block, check to see if that predecessor is
964/// also a value comparison with the same value, and if that comparison
965/// determines the outcome of this comparison. If so, simplify TI. This does a
966/// very limited form of jump threading.
967bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
968 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
969 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
970 if (!PredVal)
971 return false; // Not a value comparison in predecessor.
972
973 Value *ThisVal = isValueEqualityComparison(TI);
974 assert(ThisVal && "This isn't a value comparison!!");
975 if (ThisVal != PredVal)
976 return false; // Different predicates.
977
978 // TODO: Preserve branch weight metadata, similarly to how
979 // foldValueComparisonIntoPredecessors preserves it.
980
981 // Find out information about when control will move from Pred to TI's block.
982 std::vector<ValueEqualityComparisonCase> PredCases;
983 BasicBlock *PredDef =
984 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
985 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
986
987 // Find information about how control leaves this block.
988 std::vector<ValueEqualityComparisonCase> ThisCases;
989 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
990 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
991
992 // If TI's block is the default block from Pred's comparison, potentially
993 // simplify TI based on this knowledge.
994 if (PredDef == TI->getParent()) {
995 // If we are here, we know that the value is none of those cases listed in
996 // PredCases. If there are any cases in ThisCases that are in PredCases, we
997 // can simplify TI.
998 if (!valuesOverlap(PredCases, ThisCases))
999 return false;
1000
1001 if (isa<BranchInst>(TI)) {
1002 // Okay, one of the successors of this condbr is dead. Convert it to a
1003 // uncond br.
1004 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1005 // Insert the new branch.
1006 Instruction *NI = Builder.CreateBr(ThisDef);
1007 (void)NI;
1008
1009 // Remove PHI node entries for the dead edge.
1010 ThisCases[0].Dest->removePredecessor(PredDef);
1011
1012 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1013 << "Through successor TI: " << *TI << "Leaving: " << *NI
1014 << "\n");
1015
1017
1018 if (DTU)
1019 DTU->applyUpdates(
1020 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1021
1022 return true;
1023 }
1024
1025 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1026 // Okay, TI has cases that are statically dead, prune them away.
1027 SmallPtrSet<Constant *, 16> DeadCases;
1028 for (const ValueEqualityComparisonCase &Case : PredCases)
1029 DeadCases.insert(Case.Value);
1030
1031 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1032 << "Through successor TI: " << *TI);
1033
1034 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1035 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1036 --i;
1037 auto *Successor = i->getCaseSuccessor();
1038 if (DTU)
1039 ++NumPerSuccessorCases[Successor];
1040 if (DeadCases.count(i->getCaseValue())) {
1041 Successor->removePredecessor(PredDef);
1042 SI.removeCase(i);
1043 if (DTU)
1044 --NumPerSuccessorCases[Successor];
1045 }
1046 }
1047
1048 if (DTU) {
1049 std::vector<DominatorTree::UpdateType> Updates;
1050 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1051 if (I.second == 0)
1052 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1053 DTU->applyUpdates(Updates);
1054 }
1055
1056 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1057 return true;
1058 }
1059
1060 // Otherwise, TI's block must correspond to some matched value. Find out
1061 // which value (or set of values) this is.
1062 ConstantInt *TIV = nullptr;
1063 BasicBlock *TIBB = TI->getParent();
1064 for (const auto &[Value, Dest] : PredCases)
1065 if (Dest == TIBB) {
1066 if (TIV)
1067 return false; // Cannot handle multiple values coming to this block.
1068 TIV = Value;
1069 }
1070 assert(TIV && "No edge from pred to succ?");
1071
1072 // Okay, we found the one constant that our value can be if we get into TI's
1073 // BB. Find out which successor will unconditionally be branched to.
1074 BasicBlock *TheRealDest = nullptr;
1075 for (const auto &[Value, Dest] : ThisCases)
1076 if (Value == TIV) {
1077 TheRealDest = Dest;
1078 break;
1079 }
1080
1081 // If not handled by any explicit cases, it is handled by the default case.
1082 if (!TheRealDest)
1083 TheRealDest = ThisDef;
1084
1085 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1086
1087 // Remove PHI node entries for dead edges.
1088 BasicBlock *CheckEdge = TheRealDest;
1089 for (BasicBlock *Succ : successors(TIBB))
1090 if (Succ != CheckEdge) {
1091 if (Succ != TheRealDest)
1092 RemovedSuccs.insert(Succ);
1093 Succ->removePredecessor(TIBB);
1094 } else
1095 CheckEdge = nullptr;
1096
1097 // Insert the new branch.
1098 Instruction *NI = Builder.CreateBr(TheRealDest);
1099 (void)NI;
1100
1101 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1102 << "Through successor TI: " << *TI << "Leaving: " << *NI
1103 << "\n");
1104
1106 if (DTU) {
1107 SmallVector<DominatorTree::UpdateType, 2> Updates;
1108 Updates.reserve(RemovedSuccs.size());
1109 for (auto *RemovedSucc : RemovedSuccs)
1110 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1111 DTU->applyUpdates(Updates);
1112 }
1113 return true;
1114}
1115
1116namespace {
1117
1118/// This class implements a stable ordering of constant
1119/// integers that does not depend on their address. This is important for
1120/// applications that sort ConstantInt's to ensure uniqueness.
1121struct ConstantIntOrdering {
1122 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1123 return LHS->getValue().ult(RHS->getValue());
1124 }
1125};
1126
1127} // end anonymous namespace
1128
1130 ConstantInt *const *P2) {
1131 const ConstantInt *LHS = *P1;
1132 const ConstantInt *RHS = *P2;
1133 if (LHS == RHS)
1134 return 0;
1135 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1136}
1137
1138/// Get Weights of a given terminator, the default weight is at the front
1139/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1140/// metadata.
1142 SmallVectorImpl<uint64_t> &Weights) {
1143 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1144 assert(MD && "Invalid branch-weight metadata");
1145 extractFromBranchWeightMD64(MD, Weights);
1146
1147 // If TI is a conditional eq, the default case is the false case,
1148 // and the corresponding branch-weight data is at index 2. We swap the
1149 // default weight to be the first entry.
1150 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1151 assert(Weights.size() == 2);
1152 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1153 if (!ICI)
1154 return;
1155
1156 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1157 std::swap(Weights.front(), Weights.back());
1158 }
1159}
1160
1162 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1163 Instruction *PTI = PredBlock->getTerminator();
1164
1165 // If we have bonus instructions, clone them into the predecessor block.
1166 // Note that there may be multiple predecessor blocks, so we cannot move
1167 // bonus instructions to a predecessor block.
1168 for (Instruction &BonusInst : *BB) {
1169 if (BonusInst.isTerminator())
1170 continue;
1171
1172 Instruction *NewBonusInst = BonusInst.clone();
1173
1174 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1175 // Unless the instruction has the same !dbg location as the original
1176 // branch, drop it. When we fold the bonus instructions we want to make
1177 // sure we reset their debug locations in order to avoid stepping on
1178 // dead code caused by folding dead branches.
1179 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1180 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1181 mapAtomInstance(DL, VMap);
1182 }
1183
1184 RemapInstruction(NewBonusInst, VMap,
1186
1187 // If we speculated an instruction, we need to drop any metadata that may
1188 // result in undefined behavior, as the metadata might have been valid
1189 // only given the branch precondition.
1190 // Similarly strip attributes on call parameters that may cause UB in
1191 // location the call is moved to.
1192 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1193
1194 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1195 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1196 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1198
1199 NewBonusInst->takeName(&BonusInst);
1200 BonusInst.setName(NewBonusInst->getName() + ".old");
1201 VMap[&BonusInst] = NewBonusInst;
1202
1203 // Update (liveout) uses of bonus instructions,
1204 // now that the bonus instruction has been cloned into predecessor.
1205 // Note that we expect to be in a block-closed SSA form for this to work!
1206 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1207 auto *UI = cast<Instruction>(U.getUser());
1208 auto *PN = dyn_cast<PHINode>(UI);
1209 if (!PN) {
1210 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1211 "If the user is not a PHI node, then it should be in the same "
1212 "block as, and come after, the original bonus instruction.");
1213 continue; // Keep using the original bonus instruction.
1214 }
1215 // Is this the block-closed SSA form PHI node?
1216 if (PN->getIncomingBlock(U) == BB)
1217 continue; // Great, keep using the original bonus instruction.
1218 // The only other alternative is an "use" when coming from
1219 // the predecessor block - here we should refer to the cloned bonus instr.
1220 assert(PN->getIncomingBlock(U) == PredBlock &&
1221 "Not in block-closed SSA form?");
1222 U.set(NewBonusInst);
1223 }
1224 }
1225
1226 // Key Instructions: We may have propagated atom info into the pred. If the
1227 // pred's terminator already has atom info do nothing as merging would drop
1228 // one atom group anyway. If it doesn't, propagte the remapped atom group
1229 // from BB's terminator.
1230 if (auto &PredDL = PTI->getDebugLoc()) {
1231 auto &DL = BB->getTerminator()->getDebugLoc();
1232 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1233 PredDL.isSameSourceLocation(DL)) {
1234 PTI->setDebugLoc(DL);
1235 RemapSourceAtom(PTI, VMap);
1236 }
1237 }
1238}
1239
1240bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1241 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1242 BasicBlock *BB = TI->getParent();
1243 BasicBlock *Pred = PTI->getParent();
1244
1246
1247 // Figure out which 'cases' to copy from SI to PSI.
1248 std::vector<ValueEqualityComparisonCase> BBCases;
1249 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1250
1251 std::vector<ValueEqualityComparisonCase> PredCases;
1252 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1253
1254 // Based on whether the default edge from PTI goes to BB or not, fill in
1255 // PredCases and PredDefault with the new switch cases we would like to
1256 // build.
1257 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1258
1259 // Update the branch weight metadata along the way
1260 SmallVector<uint64_t, 8> Weights;
1261 bool PredHasWeights = hasBranchWeightMD(*PTI);
1262 bool SuccHasWeights = hasBranchWeightMD(*TI);
1263
1264 if (PredHasWeights) {
1265 getBranchWeights(PTI, Weights);
1266 // branch-weight metadata is inconsistent here.
1267 if (Weights.size() != 1 + PredCases.size())
1268 PredHasWeights = SuccHasWeights = false;
1269 } else if (SuccHasWeights)
1270 // If there are no predecessor weights but there are successor weights,
1271 // populate Weights with 1, which will later be scaled to the sum of
1272 // successor's weights
1273 Weights.assign(1 + PredCases.size(), 1);
1274
1275 SmallVector<uint64_t, 8> SuccWeights;
1276 if (SuccHasWeights) {
1277 getBranchWeights(TI, SuccWeights);
1278 // branch-weight metadata is inconsistent here.
1279 if (SuccWeights.size() != 1 + BBCases.size())
1280 PredHasWeights = SuccHasWeights = false;
1281 } else if (PredHasWeights)
1282 SuccWeights.assign(1 + BBCases.size(), 1);
1283
1284 if (PredDefault == BB) {
1285 // If this is the default destination from PTI, only the edges in TI
1286 // that don't occur in PTI, or that branch to BB will be activated.
1287 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1288 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1289 if (PredCases[i].Dest != BB)
1290 PTIHandled.insert(PredCases[i].Value);
1291 else {
1292 // The default destination is BB, we don't need explicit targets.
1293 std::swap(PredCases[i], PredCases.back());
1294
1295 if (PredHasWeights || SuccHasWeights) {
1296 // Increase weight for the default case.
1297 Weights[0] += Weights[i + 1];
1298 std::swap(Weights[i + 1], Weights.back());
1299 Weights.pop_back();
1300 }
1301
1302 PredCases.pop_back();
1303 --i;
1304 --e;
1305 }
1306
1307 // Reconstruct the new switch statement we will be building.
1308 if (PredDefault != BBDefault) {
1309 PredDefault->removePredecessor(Pred);
1310 if (DTU && PredDefault != BB)
1311 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1312 PredDefault = BBDefault;
1313 ++NewSuccessors[BBDefault];
1314 }
1315
1316 unsigned CasesFromPred = Weights.size();
1317 uint64_t ValidTotalSuccWeight = 0;
1318 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1319 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1320 PredCases.push_back(BBCases[i]);
1321 ++NewSuccessors[BBCases[i].Dest];
1322 if (SuccHasWeights || PredHasWeights) {
1323 // The default weight is at index 0, so weight for the ith case
1324 // should be at index i+1. Scale the cases from successor by
1325 // PredDefaultWeight (Weights[0]).
1326 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1327 ValidTotalSuccWeight += SuccWeights[i + 1];
1328 }
1329 }
1330
1331 if (SuccHasWeights || PredHasWeights) {
1332 ValidTotalSuccWeight += SuccWeights[0];
1333 // Scale the cases from predecessor by ValidTotalSuccWeight.
1334 for (unsigned i = 1; i < CasesFromPred; ++i)
1335 Weights[i] *= ValidTotalSuccWeight;
1336 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1337 Weights[0] *= SuccWeights[0];
1338 }
1339 } else {
1340 // If this is not the default destination from PSI, only the edges
1341 // in SI that occur in PSI with a destination of BB will be
1342 // activated.
1343 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1344 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1345 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1346 if (PredCases[i].Dest == BB) {
1347 PTIHandled.insert(PredCases[i].Value);
1348
1349 if (PredHasWeights || SuccHasWeights) {
1350 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1351 std::swap(Weights[i + 1], Weights.back());
1352 Weights.pop_back();
1353 }
1354
1355 std::swap(PredCases[i], PredCases.back());
1356 PredCases.pop_back();
1357 --i;
1358 --e;
1359 }
1360
1361 // Okay, now we know which constants were sent to BB from the
1362 // predecessor. Figure out where they will all go now.
1363 for (const ValueEqualityComparisonCase &Case : BBCases)
1364 if (PTIHandled.count(Case.Value)) {
1365 // If this is one we are capable of getting...
1366 if (PredHasWeights || SuccHasWeights)
1367 Weights.push_back(WeightsForHandled[Case.Value]);
1368 PredCases.push_back(Case);
1369 ++NewSuccessors[Case.Dest];
1370 PTIHandled.erase(Case.Value); // This constant is taken care of
1371 }
1372
1373 // If there are any constants vectored to BB that TI doesn't handle,
1374 // they must go to the default destination of TI.
1375 for (ConstantInt *I : PTIHandled) {
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[I]);
1378 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1379 ++NewSuccessors[BBDefault];
1380 }
1381 }
1382
1383 // Okay, at this point, we know which new successor Pred will get. Make
1384 // sure we update the number of entries in the PHI nodes for these
1385 // successors.
1386 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1387 if (DTU) {
1388 SuccsOfPred = {llvm::from_range, successors(Pred)};
1389 Updates.reserve(Updates.size() + NewSuccessors.size());
1390 }
1391 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1392 NewSuccessors) {
1393 for (auto I : seq(NewSuccessor.second)) {
1394 (void)I;
1395 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1396 }
1397 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1398 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1399 }
1400
1401 Builder.SetInsertPoint(PTI);
1402 // Convert pointer to int before we switch.
1403 if (CV->getType()->isPointerTy()) {
1404 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1405 "Should not end up here with unstable pointers");
1406 CV =
1407 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1408 }
1409
1410 // Now that the successors are updated, create the new Switch instruction.
1411 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1412 NewSI->setDebugLoc(PTI->getDebugLoc());
1413 for (ValueEqualityComparisonCase &V : PredCases)
1414 NewSI->addCase(V.Value, V.Dest);
1415
1416 if (PredHasWeights || SuccHasWeights)
1417 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1418 /*ElideAllZero=*/true);
1419
1421
1422 // Okay, last check. If BB is still a successor of PSI, then we must
1423 // have an infinite loop case. If so, add an infinitely looping block
1424 // to handle the case to preserve the behavior of the code.
1425 BasicBlock *InfLoopBlock = nullptr;
1426 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1427 if (NewSI->getSuccessor(i) == BB) {
1428 if (!InfLoopBlock) {
1429 // Insert it at the end of the function, because it's either code,
1430 // or it won't matter if it's hot. :)
1431 InfLoopBlock =
1432 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1433 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1434 if (DTU)
1435 Updates.push_back(
1436 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1437 }
1438 NewSI->setSuccessor(i, InfLoopBlock);
1439 }
1440
1441 if (DTU) {
1442 if (InfLoopBlock)
1443 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1444
1445 Updates.push_back({DominatorTree::Delete, Pred, BB});
1446
1447 DTU->applyUpdates(Updates);
1448 }
1449
1450 ++NumFoldValueComparisonIntoPredecessors;
1451 return true;
1452}
1453
1454/// The specified terminator is a value equality comparison instruction
1455/// (either a switch or a branch on "X == c").
1456/// See if any of the predecessors of the terminator block are value comparisons
1457/// on the same value. If so, and if safe to do so, fold them together.
1458bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1459 IRBuilder<> &Builder) {
1460 BasicBlock *BB = TI->getParent();
1461 Value *CV = isValueEqualityComparison(TI); // CondVal
1462 assert(CV && "Not a comparison?");
1463
1464 bool Changed = false;
1465
1466 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1467 while (!Preds.empty()) {
1468 BasicBlock *Pred = Preds.pop_back_val();
1469 Instruction *PTI = Pred->getTerminator();
1470
1471 // Don't try to fold into itself.
1472 if (Pred == BB)
1473 continue;
1474
1475 // See if the predecessor is a comparison with the same value.
1476 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1477 if (PCV != CV)
1478 continue;
1479
1480 SmallSetVector<BasicBlock *, 4> FailBlocks;
1481 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1482 for (auto *Succ : FailBlocks) {
1483 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1484 return false;
1485 }
1486 }
1487
1488 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1489 Changed = true;
1490 }
1491 return Changed;
1492}
1493
1494// If we would need to insert a select that uses the value of this invoke
1495// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1496// need to do this), we can't hoist the invoke, as there is nowhere to put the
1497// select in this case.
1499 Instruction *I1, Instruction *I2) {
1500 for (BasicBlock *Succ : successors(BB1)) {
1501 for (const PHINode &PN : Succ->phis()) {
1502 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1503 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1504 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1505 return false;
1506 }
1507 }
1508 }
1509 return true;
1510}
1511
1512// Get interesting characteristics of instructions that
1513// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1514// instructions can be reordered across.
1520
1522 unsigned Flags = 0;
1523 if (I->mayReadFromMemory())
1524 Flags |= SkipReadMem;
1525 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1526 // inalloca) across stacksave/stackrestore boundaries.
1527 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1528 Flags |= SkipSideEffect;
1530 Flags |= SkipImplicitControlFlow;
1531 return Flags;
1532}
1533
1534// Returns true if it is safe to reorder an instruction across preceding
1535// instructions in a basic block.
1536static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1537 // Don't reorder a store over a load.
1538 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1539 return false;
1540
1541 // If we have seen an instruction with side effects, it's unsafe to reorder an
1542 // instruction which reads memory or itself has side effects.
1543 if ((Flags & SkipSideEffect) &&
1544 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1545 return false;
1546
1547 // Reordering across an instruction which does not necessarily transfer
1548 // control to the next instruction is speculation.
1550 return false;
1551
1552 // Hoisting of llvm.deoptimize is only legal together with the next return
1553 // instruction, which this pass is not always able to do.
1554 if (auto *CB = dyn_cast<CallBase>(I))
1555 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1556 return false;
1557
1558 // It's also unsafe/illegal to hoist an instruction above its instruction
1559 // operands
1560 BasicBlock *BB = I->getParent();
1561 for (Value *Op : I->operands()) {
1562 if (auto *J = dyn_cast<Instruction>(Op))
1563 if (J->getParent() == BB)
1564 return false;
1565 }
1566
1567 return true;
1568}
1569
1570static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1571
1572/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1573/// instructions \p I1 and \p I2 can and should be hoisted.
1575 const TargetTransformInfo &TTI) {
1576 // If we're going to hoist a call, make sure that the two instructions
1577 // we're commoning/hoisting are both marked with musttail, or neither of
1578 // them is marked as such. Otherwise, we might end up in a situation where
1579 // we hoist from a block where the terminator is a `ret` to a block where
1580 // the terminator is a `br`, and `musttail` calls expect to be followed by
1581 // a return.
1582 auto *C1 = dyn_cast<CallInst>(I1);
1583 auto *C2 = dyn_cast<CallInst>(I2);
1584 if (C1 && C2)
1585 if (C1->isMustTailCall() != C2->isMustTailCall())
1586 return false;
1587
1588 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1589 return false;
1590
1591 // If any of the two call sites has nomerge or convergent attribute, stop
1592 // hoisting.
1593 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1594 if (CB1->cannotMerge() || CB1->isConvergent())
1595 return false;
1596 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1597 if (CB2->cannotMerge() || CB2->isConvergent())
1598 return false;
1599
1600 return true;
1601}
1602
1603/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1604/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1605/// hoistCommonCodeFromSuccessors. e.g. The input:
1606/// I1 DVRs: { x, z },
1607/// OtherInsts: { I2 DVRs: { x, y, z } }
1608/// would result in hoisting only DbgVariableRecord x.
1610 Instruction *TI, Instruction *I1,
1611 SmallVectorImpl<Instruction *> &OtherInsts) {
1612 if (!I1->hasDbgRecords())
1613 return;
1614 using CurrentAndEndIt =
1615 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1616 // Vector of {Current, End} iterators.
1618 Itrs.reserve(OtherInsts.size() + 1);
1619 // Helper lambdas for lock-step checks:
1620 // Return true if this Current == End.
1621 auto atEnd = [](const CurrentAndEndIt &Pair) {
1622 return Pair.first == Pair.second;
1623 };
1624 // Return true if all Current are identical.
1625 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1626 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1628 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1629 });
1630 };
1631
1632 // Collect the iterators.
1633 Itrs.push_back(
1634 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1635 for (Instruction *Other : OtherInsts) {
1636 if (!Other->hasDbgRecords())
1637 return;
1638 Itrs.push_back(
1639 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1640 }
1641
1642 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1643 // the lock-step DbgRecord are identical, hoist all of them to TI.
1644 // This replicates the dbg.* intrinsic behaviour in
1645 // hoistCommonCodeFromSuccessors.
1646 while (none_of(Itrs, atEnd)) {
1647 bool HoistDVRs = allIdentical(Itrs);
1648 for (CurrentAndEndIt &Pair : Itrs) {
1649 // Increment Current iterator now as we may be about to move the
1650 // DbgRecord.
1651 DbgRecord &DR = *Pair.first++;
1652 if (HoistDVRs) {
1653 DR.removeFromParent();
1654 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1655 }
1656 }
1657 }
1658}
1659
1661 const Instruction *I2) {
1662 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1663 return true;
1664
1665 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1666 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1667 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1668 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1669 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1670
1671 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1672 return I1->getOperand(0) == I2->getOperand(1) &&
1673 I1->getOperand(1) == I2->getOperand(0) &&
1674 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1675 }
1676
1677 return false;
1678}
1679
1680/// If the target supports conditional faulting,
1681/// we look for the following pattern:
1682/// \code
1683/// BB:
1684/// ...
1685/// %cond = icmp ult %x, %y
1686/// br i1 %cond, label %TrueBB, label %FalseBB
1687/// FalseBB:
1688/// store i32 1, ptr %q, align 4
1689/// ...
1690/// TrueBB:
1691/// %maskedloadstore = load i32, ptr %b, align 4
1692/// store i32 %maskedloadstore, ptr %p, align 4
1693/// ...
1694/// \endcode
1695///
1696/// and transform it into:
1697///
1698/// \code
1699/// BB:
1700/// ...
1701/// %cond = icmp ult %x, %y
1702/// %maskedloadstore = cload i32, ptr %b, %cond
1703/// cstore i32 %maskedloadstore, ptr %p, %cond
1704/// cstore i32 1, ptr %q, ~%cond
1705/// br i1 %cond, label %TrueBB, label %FalseBB
1706/// FalseBB:
1707/// ...
1708/// TrueBB:
1709/// ...
1710/// \endcode
1711///
1712/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1713/// e.g.
1714///
1715/// \code
1716/// %vcond = bitcast i1 %cond to <1 x i1>
1717/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1718/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1719/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1720/// call void @llvm.masked.store.v1i32.p0
1721/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1722/// %cond.not = xor i1 %cond, true
1723/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1724/// call void @llvm.masked.store.v1i32.p0
1725/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1726/// \endcode
1727///
1728/// So we need to turn hoisted load/store into cload/cstore.
1729///
1730/// \param BI The branch instruction.
1731/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1732/// will be speculated.
1733/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1735 BranchInst *BI,
1736 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1737 std::optional<bool> Invert, Instruction *Sel) {
1738 auto &Context = BI->getParent()->getContext();
1739 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1740 auto *Cond = BI->getOperand(0);
1741 // Construct the condition if needed.
1742 BasicBlock *BB = BI->getParent();
1743 Value *Mask = nullptr;
1744 Value *MaskFalse = nullptr;
1745 Value *MaskTrue = nullptr;
1746 if (Invert.has_value()) {
1747 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1748 Mask = Builder.CreateBitCast(
1749 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1750 VCondTy);
1751 } else {
1752 IRBuilder<> Builder(BI);
1753 MaskFalse = Builder.CreateBitCast(
1754 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1755 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1756 }
1757 auto PeekThroughBitcasts = [](Value *V) {
1758 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1759 V = BitCast->getOperand(0);
1760 return V;
1761 };
1762 for (auto *I : SpeculatedConditionalLoadsStores) {
1763 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1764 if (!Invert.has_value())
1765 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1766 // We currently assume conditional faulting load/store is supported for
1767 // scalar types only when creating new instructions. This can be easily
1768 // extended for vector types in the future.
1769 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1770 auto *Op0 = I->getOperand(0);
1771 CallInst *MaskedLoadStore = nullptr;
1772 if (auto *LI = dyn_cast<LoadInst>(I)) {
1773 // Handle Load.
1774 auto *Ty = I->getType();
1775 PHINode *PN = nullptr;
1776 Value *PassThru = nullptr;
1777 if (Invert.has_value())
1778 for (User *U : I->users()) {
1779 if ((PN = dyn_cast<PHINode>(U))) {
1780 PassThru = Builder.CreateBitCast(
1781 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1782 FixedVectorType::get(Ty, 1));
1783 } else if (auto *Ins = cast<Instruction>(U);
1784 Sel && Ins->getParent() == BB) {
1785 // This happens when store or/and a speculative instruction between
1786 // load and store were hoisted to the BB. Make sure the masked load
1787 // inserted before its use.
1788 // We assume there's one of such use.
1789 Builder.SetInsertPoint(Ins);
1790 }
1791 }
1792 MaskedLoadStore = Builder.CreateMaskedLoad(
1793 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1794 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1795 if (PN)
1796 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1797 I->replaceAllUsesWith(NewLoadStore);
1798 } else {
1799 // Handle Store.
1800 auto *StoredVal = Builder.CreateBitCast(
1801 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1802 MaskedLoadStore = Builder.CreateMaskedStore(
1803 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1804 }
1805 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1806 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1807 //
1808 // !nonnull, !align : Not support pointer type, no need to keep.
1809 // !range: Load type is changed from scalar to vector, but the metadata on
1810 // vector specifies a per-element range, so the semantics stay the
1811 // same. Keep it.
1812 // !annotation: Not impact semantics. Keep it.
1813 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1814 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1815 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1816 // FIXME: DIAssignID is not supported for masked store yet.
1817 // (Verifier::visitDIAssignIDMetadata)
1819 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1820 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1821 });
1822 MaskedLoadStore->copyMetadata(*I);
1823 I->eraseFromParent();
1824 }
1825}
1826
1828 const TargetTransformInfo &TTI) {
1829 // Not handle volatile or atomic.
1830 bool IsStore = false;
1831 if (auto *L = dyn_cast<LoadInst>(I)) {
1832 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1833 return false;
1834 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1835 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1836 return false;
1837 IsStore = true;
1838 } else
1839 return false;
1840
1841 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1842 // That's why we have the alignment limitation.
1843 // FIXME: Update the prototype of the intrinsics?
1844 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1846}
1847
1848/// Hoist any common code in the successor blocks up into the block. This
1849/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1850/// given, only perform hoisting in case all successors blocks contain matching
1851/// instructions only. In that case, all instructions can be hoisted and the
1852/// original branch will be replaced and selects for PHIs are added.
1853bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1854 bool AllInstsEqOnly) {
1855 // This does very trivial matching, with limited scanning, to find identical
1856 // instructions in the two blocks. In particular, we don't want to get into
1857 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1858 // such, we currently just scan for obviously identical instructions in an
1859 // identical order, possibly separated by the same number of non-identical
1860 // instructions.
1861 BasicBlock *BB = TI->getParent();
1862 unsigned int SuccSize = succ_size(BB);
1863 if (SuccSize < 2)
1864 return false;
1865
1866 // If either of the blocks has it's address taken, then we can't do this fold,
1867 // because the code we'd hoist would no longer run when we jump into the block
1868 // by it's address.
1869 for (auto *Succ : successors(BB))
1870 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1871 return false;
1872
1873 // The second of pair is a SkipFlags bitmask.
1874 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1875 SmallVector<SuccIterPair, 8> SuccIterPairs;
1876 for (auto *Succ : successors(BB)) {
1877 BasicBlock::iterator SuccItr = Succ->begin();
1878 if (isa<PHINode>(*SuccItr))
1879 return false;
1880 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1881 }
1882
1883 if (AllInstsEqOnly) {
1884 // Check if all instructions in the successor blocks match. This allows
1885 // hoisting all instructions and removing the blocks we are hoisting from,
1886 // so does not add any new instructions.
1888 // Check if sizes and terminators of all successors match.
1889 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1890 Instruction *Term0 = Succs[0]->getTerminator();
1891 Instruction *Term = Succ->getTerminator();
1892 return !Term->isSameOperationAs(Term0) ||
1893 !equal(Term->operands(), Term0->operands()) ||
1894 Succs[0]->size() != Succ->size();
1895 });
1896 if (!AllSame)
1897 return false;
1898 if (AllSame) {
1899 LockstepReverseIterator<true> LRI(Succs);
1900 while (LRI.isValid()) {
1901 Instruction *I0 = (*LRI)[0];
1902 if (any_of(*LRI, [I0](Instruction *I) {
1903 return !areIdenticalUpToCommutativity(I0, I);
1904 })) {
1905 return false;
1906 }
1907 --LRI;
1908 }
1909 }
1910 // Now we know that all instructions in all successors can be hoisted. Let
1911 // the loop below handle the hoisting.
1912 }
1913
1914 // Count how many instructions were not hoisted so far. There's a limit on how
1915 // many instructions we skip, serving as a compilation time control as well as
1916 // preventing excessive increase of life ranges.
1917 unsigned NumSkipped = 0;
1918 // If we find an unreachable instruction at the beginning of a basic block, we
1919 // can still hoist instructions from the rest of the basic blocks.
1920 if (SuccIterPairs.size() > 2) {
1921 erase_if(SuccIterPairs,
1922 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1923 if (SuccIterPairs.size() < 2)
1924 return false;
1925 }
1926
1927 bool Changed = false;
1928
1929 for (;;) {
1930 auto *SuccIterPairBegin = SuccIterPairs.begin();
1931 auto &BB1ItrPair = *SuccIterPairBegin++;
1932 auto OtherSuccIterPairRange =
1933 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1934 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1935
1936 Instruction *I1 = &*BB1ItrPair.first;
1937
1938 bool AllInstsAreIdentical = true;
1939 bool HasTerminator = I1->isTerminator();
1940 for (auto &SuccIter : OtherSuccIterRange) {
1941 Instruction *I2 = &*SuccIter;
1942 HasTerminator |= I2->isTerminator();
1943 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1944 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1945 AllInstsAreIdentical = false;
1946 }
1947
1948 SmallVector<Instruction *, 8> OtherInsts;
1949 for (auto &SuccIter : OtherSuccIterRange)
1950 OtherInsts.push_back(&*SuccIter);
1951
1952 // If we are hoisting the terminator instruction, don't move one (making a
1953 // broken BB), instead clone it, and remove BI.
1954 if (HasTerminator) {
1955 // Even if BB, which contains only one unreachable instruction, is ignored
1956 // at the beginning of the loop, we can hoist the terminator instruction.
1957 // If any instructions remain in the block, we cannot hoist terminators.
1958 if (NumSkipped || !AllInstsAreIdentical) {
1959 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1960 return Changed;
1961 }
1962
1963 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1964 Changed;
1965 }
1966
1967 if (AllInstsAreIdentical) {
1968 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1969 AllInstsAreIdentical =
1970 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1971 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1972 Instruction *I2 = &*Pair.first;
1973 unsigned SkipFlagsBB2 = Pair.second;
1974 // Even if the instructions are identical, it may not
1975 // be safe to hoist them if we have skipped over
1976 // instructions with side effects or their operands
1977 // weren't hoisted.
1978 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1980 });
1981 }
1982
1983 if (AllInstsAreIdentical) {
1984 BB1ItrPair.first++;
1985 // For a normal instruction, we just move one to right before the
1986 // branch, then replace all uses of the other with the first. Finally,
1987 // we remove the now redundant second instruction.
1988 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1989 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1990 // and leave any that were not hoisted behind (by calling moveBefore
1991 // rather than moveBeforePreserving).
1992 I1->moveBefore(TI->getIterator());
1993 for (auto &SuccIter : OtherSuccIterRange) {
1994 Instruction *I2 = &*SuccIter++;
1995 assert(I2 != I1);
1996 if (!I2->use_empty())
1997 I2->replaceAllUsesWith(I1);
1998 I1->andIRFlags(I2);
1999 if (auto *CB = dyn_cast<CallBase>(I1)) {
2000 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2001 assert(Success && "We should not be trying to hoist callbases "
2002 "with non-intersectable attributes");
2003 // For NDEBUG Compile.
2004 (void)Success;
2005 }
2006
2007 combineMetadataForCSE(I1, I2, true);
2008 // I1 and I2 are being combined into a single instruction. Its debug
2009 // location is the merged locations of the original instructions.
2010 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2011 I2->eraseFromParent();
2012 }
2013 if (!Changed)
2014 NumHoistCommonCode += SuccIterPairs.size();
2015 Changed = true;
2016 NumHoistCommonInstrs += SuccIterPairs.size();
2017 } else {
2018 if (NumSkipped >= HoistCommonSkipLimit) {
2019 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2020 return Changed;
2021 }
2022 // We are about to skip over a pair of non-identical instructions. Record
2023 // if any have characteristics that would prevent reordering instructions
2024 // across them.
2025 for (auto &SuccIterPair : SuccIterPairs) {
2026 Instruction *I = &*SuccIterPair.first++;
2027 SuccIterPair.second |= skippedInstrFlags(I);
2028 }
2029 ++NumSkipped;
2030 }
2031 }
2032}
2033
2034bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2035 Instruction *TI, Instruction *I1,
2036 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2037
2038 auto *BI = dyn_cast<BranchInst>(TI);
2039
2040 bool Changed = false;
2041 BasicBlock *TIParent = TI->getParent();
2042 BasicBlock *BB1 = I1->getParent();
2043
2044 // Use only for an if statement.
2045 auto *I2 = *OtherSuccTIs.begin();
2046 auto *BB2 = I2->getParent();
2047 if (BI) {
2048 assert(OtherSuccTIs.size() == 1);
2049 assert(BI->getSuccessor(0) == I1->getParent());
2050 assert(BI->getSuccessor(1) == I2->getParent());
2051 }
2052
2053 // In the case of an if statement, we try to hoist an invoke.
2054 // FIXME: Can we define a safety predicate for CallBr?
2055 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2056 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2057 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2058 return false;
2059
2060 // TODO: callbr hoisting currently disabled pending further study.
2061 if (isa<CallBrInst>(I1))
2062 return false;
2063
2064 for (BasicBlock *Succ : successors(BB1)) {
2065 for (PHINode &PN : Succ->phis()) {
2066 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2067 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2068 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2069 if (BB1V == BB2V)
2070 continue;
2071
2072 // In the case of an if statement, check for
2073 // passingValueIsAlwaysUndefined here because we would rather eliminate
2074 // undefined control flow then converting it to a select.
2075 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2077 return false;
2078 }
2079 }
2080 }
2081
2082 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2083 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2084 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2085 // Clone the terminator and hoist it into the pred, without any debug info.
2086 Instruction *NT = I1->clone();
2087 NT->insertInto(TIParent, TI->getIterator());
2088 if (!NT->getType()->isVoidTy()) {
2089 I1->replaceAllUsesWith(NT);
2090 for (Instruction *OtherSuccTI : OtherSuccTIs)
2091 OtherSuccTI->replaceAllUsesWith(NT);
2092 NT->takeName(I1);
2093 }
2094 Changed = true;
2095 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2096
2097 // Ensure terminator gets a debug location, even an unknown one, in case
2098 // it involves inlinable calls.
2100 Locs.push_back(I1->getDebugLoc());
2101 for (auto *OtherSuccTI : OtherSuccTIs)
2102 Locs.push_back(OtherSuccTI->getDebugLoc());
2103 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2104
2105 // PHIs created below will adopt NT's merged DebugLoc.
2106 IRBuilder<NoFolder> Builder(NT);
2107
2108 // In the case of an if statement, hoisting one of the terminators from our
2109 // successor is a great thing. Unfortunately, the successors of the if/else
2110 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2111 // must agree for all PHI nodes, so we insert select instruction to compute
2112 // the final result.
2113 if (BI) {
2114 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2115 for (BasicBlock *Succ : successors(BB1)) {
2116 for (PHINode &PN : Succ->phis()) {
2117 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2118 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2119 if (BB1V == BB2V)
2120 continue;
2121
2122 // These values do not agree. Insert a select instruction before NT
2123 // that determines the right value.
2124 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2125 if (!SI) {
2126 // Propagate fast-math-flags from phi node to its replacement select.
2128 BI->getCondition(), BB1V, BB2V,
2129 isa<FPMathOperator>(PN) ? &PN : nullptr,
2130 BB1V->getName() + "." + BB2V->getName(), BI));
2131 }
2132
2133 // Make the PHI node use the select for all incoming values for BB1/BB2
2134 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2135 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2136 PN.setIncomingValue(i, SI);
2137 }
2138 }
2139 }
2140
2142
2143 // Update any PHI nodes in our new successors.
2144 for (BasicBlock *Succ : successors(BB1)) {
2145 addPredecessorToBlock(Succ, TIParent, BB1);
2146 if (DTU)
2147 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2148 }
2149
2150 if (DTU)
2151 for (BasicBlock *Succ : successors(TI))
2152 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2153
2155 if (DTU)
2156 DTU->applyUpdates(Updates);
2157 return Changed;
2158}
2159
2160// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2161// into variables.
2163 int OpIdx) {
2164 // Divide/Remainder by constant is typically much cheaper than by variable.
2165 if (I->isIntDivRem())
2166 return OpIdx != 1;
2167 return !isa<IntrinsicInst>(I);
2168}
2169
2170// All instructions in Insts belong to different blocks that all unconditionally
2171// branch to a common successor. Analyze each instruction and return true if it
2172// would be possible to sink them into their successor, creating one common
2173// instruction instead. For every value that would be required to be provided by
2174// PHI node (because an operand varies in each input block), add to PHIOperands.
2177 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2178 // Prune out obviously bad instructions to move. Each instruction must have
2179 // the same number of uses, and we check later that the uses are consistent.
2180 std::optional<unsigned> NumUses;
2181 for (auto *I : Insts) {
2182 // These instructions may change or break semantics if moved.
2183 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2184 I->getType()->isTokenTy())
2185 return false;
2186
2187 // Do not try to sink an instruction in an infinite loop - it can cause
2188 // this algorithm to infinite loop.
2189 if (I->getParent()->getSingleSuccessor() == I->getParent())
2190 return false;
2191
2192 // Conservatively return false if I is an inline-asm instruction. Sinking
2193 // and merging inline-asm instructions can potentially create arguments
2194 // that cannot satisfy the inline-asm constraints.
2195 // If the instruction has nomerge or convergent attribute, return false.
2196 if (const auto *C = dyn_cast<CallBase>(I))
2197 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2198 return false;
2199
2200 if (!NumUses)
2201 NumUses = I->getNumUses();
2202 else if (NumUses != I->getNumUses())
2203 return false;
2204 }
2205
2206 const Instruction *I0 = Insts.front();
2207 const auto I0MMRA = MMRAMetadata(*I0);
2208 for (auto *I : Insts) {
2209 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2210 return false;
2211
2212 // Treat MMRAs conservatively. This pass can be quite aggressive and
2213 // could drop a lot of MMRAs otherwise.
2214 if (MMRAMetadata(*I) != I0MMRA)
2215 return false;
2216 }
2217
2218 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2219 // then the other phi operands must match the instructions from Insts. This
2220 // also has to hold true for any phi nodes that would be created as a result
2221 // of sinking. Both of these cases are represented by PhiOperands.
2222 for (const Use &U : I0->uses()) {
2223 auto It = PHIOperands.find(&U);
2224 if (It == PHIOperands.end())
2225 // There may be uses in other blocks when sinking into a loop header.
2226 return false;
2227 if (!equal(Insts, It->second))
2228 return false;
2229 }
2230
2231 // For calls to be sinkable, they must all be indirect, or have same callee.
2232 // I.e. if we have two direct calls to different callees, we don't want to
2233 // turn that into an indirect call. Likewise, if we have an indirect call,
2234 // and a direct call, we don't actually want to have a single indirect call.
2235 if (isa<CallBase>(I0)) {
2236 auto IsIndirectCall = [](const Instruction *I) {
2237 return cast<CallBase>(I)->isIndirectCall();
2238 };
2239 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2240 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2241 if (HaveIndirectCalls) {
2242 if (!AllCallsAreIndirect)
2243 return false;
2244 } else {
2245 // All callees must be identical.
2246 Value *Callee = nullptr;
2247 for (const Instruction *I : Insts) {
2248 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2249 if (!Callee)
2250 Callee = CurrCallee;
2251 else if (Callee != CurrCallee)
2252 return false;
2253 }
2254 }
2255 }
2256
2257 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2258 Value *Op = I0->getOperand(OI);
2259 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2260 assert(I->getNumOperands() == I0->getNumOperands());
2261 return I->getOperand(OI) == I0->getOperand(OI);
2262 };
2263 if (!all_of(Insts, SameAsI0)) {
2266 // We can't create a PHI from this GEP.
2267 return false;
2268 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2269 for (auto *I : Insts)
2270 Ops.push_back(I->getOperand(OI));
2271 }
2272 }
2273 return true;
2274}
2275
2276// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2277// instruction of every block in Blocks to their common successor, commoning
2278// into one instruction.
2280 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2281
2282 // canSinkInstructions returning true guarantees that every block has at
2283 // least one non-terminator instruction.
2285 for (auto *BB : Blocks) {
2286 Instruction *I = BB->getTerminator();
2287 I = I->getPrevNode();
2288 Insts.push_back(I);
2289 }
2290
2291 // We don't need to do any more checking here; canSinkInstructions should
2292 // have done it all for us.
2293 SmallVector<Value*, 4> NewOperands;
2294 Instruction *I0 = Insts.front();
2295 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2296 // This check is different to that in canSinkInstructions. There, we
2297 // cared about the global view once simplifycfg (and instcombine) have
2298 // completed - it takes into account PHIs that become trivially
2299 // simplifiable. However here we need a more local view; if an operand
2300 // differs we create a PHI and rely on instcombine to clean up the very
2301 // small mess we may make.
2302 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2303 return I->getOperand(O) != I0->getOperand(O);
2304 });
2305 if (!NeedPHI) {
2306 NewOperands.push_back(I0->getOperand(O));
2307 continue;
2308 }
2309
2310 // Create a new PHI in the successor block and populate it.
2311 auto *Op = I0->getOperand(O);
2312 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2313 auto *PN =
2314 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2315 PN->insertBefore(BBEnd->begin());
2316 for (auto *I : Insts)
2317 PN->addIncoming(I->getOperand(O), I->getParent());
2318 NewOperands.push_back(PN);
2319 }
2320
2321 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2322 // and move it to the start of the successor block.
2323 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2324 I0->getOperandUse(O).set(NewOperands[O]);
2325
2326 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2327
2328 // Update metadata and IR flags, and merge debug locations.
2329 for (auto *I : Insts)
2330 if (I != I0) {
2331 // The debug location for the "common" instruction is the merged locations
2332 // of all the commoned instructions. We start with the original location
2333 // of the "common" instruction and iteratively merge each location in the
2334 // loop below.
2335 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2336 // However, as N-way merge for CallInst is rare, so we use simplified API
2337 // instead of using complex API for N-way merge.
2338 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2339 combineMetadataForCSE(I0, I, true);
2340 I0->andIRFlags(I);
2341 if (auto *CB = dyn_cast<CallBase>(I0)) {
2342 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2343 assert(Success && "We should not be trying to sink callbases "
2344 "with non-intersectable attributes");
2345 // For NDEBUG Compile.
2346 (void)Success;
2347 }
2348 }
2349
2350 for (User *U : make_early_inc_range(I0->users())) {
2351 // canSinkLastInstruction checked that all instructions are only used by
2352 // phi nodes in a way that allows replacing the phi node with the common
2353 // instruction.
2354 auto *PN = cast<PHINode>(U);
2355 PN->replaceAllUsesWith(I0);
2356 PN->eraseFromParent();
2357 }
2358
2359 // Finally nuke all instructions apart from the common instruction.
2360 for (auto *I : Insts) {
2361 if (I == I0)
2362 continue;
2363 // The remaining uses are debug users, replace those with the common inst.
2364 // In most (all?) cases this just introduces a use-before-def.
2365 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2366 I->replaceAllUsesWith(I0);
2367 I->eraseFromParent();
2368 }
2369}
2370
2371/// Check whether BB's predecessors end with unconditional branches. If it is
2372/// true, sink any common code from the predecessors to BB.
2374 DomTreeUpdater *DTU) {
2375 // We support two situations:
2376 // (1) all incoming arcs are unconditional
2377 // (2) there are non-unconditional incoming arcs
2378 //
2379 // (2) is very common in switch defaults and
2380 // else-if patterns;
2381 //
2382 // if (a) f(1);
2383 // else if (b) f(2);
2384 //
2385 // produces:
2386 //
2387 // [if]
2388 // / \
2389 // [f(1)] [if]
2390 // | | \
2391 // | | |
2392 // | [f(2)]|
2393 // \ | /
2394 // [ end ]
2395 //
2396 // [end] has two unconditional predecessor arcs and one conditional. The
2397 // conditional refers to the implicit empty 'else' arc. This conditional
2398 // arc can also be caused by an empty default block in a switch.
2399 //
2400 // In this case, we attempt to sink code from all *unconditional* arcs.
2401 // If we can sink instructions from these arcs (determined during the scan
2402 // phase below) we insert a common successor for all unconditional arcs and
2403 // connect that to [end], to enable sinking:
2404 //
2405 // [if]
2406 // / \
2407 // [x(1)] [if]
2408 // | | \
2409 // | | \
2410 // | [x(2)] |
2411 // \ / |
2412 // [sink.split] |
2413 // \ /
2414 // [ end ]
2415 //
2416 SmallVector<BasicBlock*,4> UnconditionalPreds;
2417 bool HaveNonUnconditionalPredecessors = false;
2418 for (auto *PredBB : predecessors(BB)) {
2419 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2420 if (PredBr && PredBr->isUnconditional())
2421 UnconditionalPreds.push_back(PredBB);
2422 else
2423 HaveNonUnconditionalPredecessors = true;
2424 }
2425 if (UnconditionalPreds.size() < 2)
2426 return false;
2427
2428 // We take a two-step approach to tail sinking. First we scan from the end of
2429 // each block upwards in lockstep. If the n'th instruction from the end of each
2430 // block can be sunk, those instructions are added to ValuesToSink and we
2431 // carry on. If we can sink an instruction but need to PHI-merge some operands
2432 // (because they're not identical in each instruction) we add these to
2433 // PHIOperands.
2434 // We prepopulate PHIOperands with the phis that already exist in BB.
2436 for (PHINode &PN : BB->phis()) {
2438 for (const Use &U : PN.incoming_values())
2439 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2440 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2441 for (BasicBlock *Pred : UnconditionalPreds)
2442 Ops.push_back(*IncomingVals[Pred]);
2443 }
2444
2445 int ScanIdx = 0;
2446 SmallPtrSet<Value*,4> InstructionsToSink;
2447 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2448 while (LRI.isValid() &&
2449 canSinkInstructions(*LRI, PHIOperands)) {
2450 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2451 << "\n");
2452 InstructionsToSink.insert_range(*LRI);
2453 ++ScanIdx;
2454 --LRI;
2455 }
2456
2457 // If no instructions can be sunk, early-return.
2458 if (ScanIdx == 0)
2459 return false;
2460
2461 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2462
2463 if (!followedByDeoptOrUnreachable) {
2464 // Check whether this is the pointer operand of a load/store.
2465 auto IsMemOperand = [](Use &U) {
2466 auto *I = cast<Instruction>(U.getUser());
2467 if (isa<LoadInst>(I))
2468 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2469 if (isa<StoreInst>(I))
2470 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2471 return false;
2472 };
2473
2474 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2475 // actually sink before encountering instruction that is unprofitable to
2476 // sink?
2477 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2478 unsigned NumPHIInsts = 0;
2479 for (Use &U : (*LRI)[0]->operands()) {
2480 auto It = PHIOperands.find(&U);
2481 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2482 return InstructionsToSink.contains(V);
2483 })) {
2484 ++NumPHIInsts;
2485 // Do not separate a load/store from the gep producing the address.
2486 // The gep can likely be folded into the load/store as an addressing
2487 // mode. Additionally, a load of a gep is easier to analyze than a
2488 // load of a phi.
2489 if (IsMemOperand(U) &&
2490 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2491 return false;
2492 // FIXME: this check is overly optimistic. We may end up not sinking
2493 // said instruction, due to the very same profitability check.
2494 // See @creating_too_many_phis in sink-common-code.ll.
2495 }
2496 }
2497 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2498 return NumPHIInsts <= 1;
2499 };
2500
2501 // We've determined that we are going to sink last ScanIdx instructions,
2502 // and recorded them in InstructionsToSink. Now, some instructions may be
2503 // unprofitable to sink. But that determination depends on the instructions
2504 // that we are going to sink.
2505
2506 // First, forward scan: find the first instruction unprofitable to sink,
2507 // recording all the ones that are profitable to sink.
2508 // FIXME: would it be better, after we detect that not all are profitable.
2509 // to either record the profitable ones, or erase the unprofitable ones?
2510 // Maybe we need to choose (at runtime) the one that will touch least
2511 // instrs?
2512 LRI.reset();
2513 int Idx = 0;
2514 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2515 while (Idx < ScanIdx) {
2516 if (!ProfitableToSinkInstruction(LRI)) {
2517 // Too many PHIs would be created.
2518 LLVM_DEBUG(
2519 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2520 break;
2521 }
2522 InstructionsProfitableToSink.insert_range(*LRI);
2523 --LRI;
2524 ++Idx;
2525 }
2526
2527 // If no instructions can be sunk, early-return.
2528 if (Idx == 0)
2529 return false;
2530
2531 // Did we determine that (only) some instructions are unprofitable to sink?
2532 if (Idx < ScanIdx) {
2533 // Okay, some instructions are unprofitable.
2534 ScanIdx = Idx;
2535 InstructionsToSink = InstructionsProfitableToSink;
2536
2537 // But, that may make other instructions unprofitable, too.
2538 // So, do a backward scan, do any earlier instructions become
2539 // unprofitable?
2540 assert(
2541 !ProfitableToSinkInstruction(LRI) &&
2542 "We already know that the last instruction is unprofitable to sink");
2543 ++LRI;
2544 --Idx;
2545 while (Idx >= 0) {
2546 // If we detect that an instruction becomes unprofitable to sink,
2547 // all earlier instructions won't be sunk either,
2548 // so preemptively keep InstructionsProfitableToSink in sync.
2549 // FIXME: is this the most performant approach?
2550 for (auto *I : *LRI)
2551 InstructionsProfitableToSink.erase(I);
2552 if (!ProfitableToSinkInstruction(LRI)) {
2553 // Everything starting with this instruction won't be sunk.
2554 ScanIdx = Idx;
2555 InstructionsToSink = InstructionsProfitableToSink;
2556 }
2557 ++LRI;
2558 --Idx;
2559 }
2560 }
2561
2562 // If no instructions can be sunk, early-return.
2563 if (ScanIdx == 0)
2564 return false;
2565 }
2566
2567 bool Changed = false;
2568
2569 if (HaveNonUnconditionalPredecessors) {
2570 if (!followedByDeoptOrUnreachable) {
2571 // It is always legal to sink common instructions from unconditional
2572 // predecessors. However, if not all predecessors are unconditional,
2573 // this transformation might be pessimizing. So as a rule of thumb,
2574 // don't do it unless we'd sink at least one non-speculatable instruction.
2575 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2576 LRI.reset();
2577 int Idx = 0;
2578 bool Profitable = false;
2579 while (Idx < ScanIdx) {
2580 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2581 Profitable = true;
2582 break;
2583 }
2584 --LRI;
2585 ++Idx;
2586 }
2587 if (!Profitable)
2588 return false;
2589 }
2590
2591 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2592 // We have a conditional edge and we're going to sink some instructions.
2593 // Insert a new block postdominating all blocks we're going to sink from.
2594 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2595 // Edges couldn't be split.
2596 return false;
2597 Changed = true;
2598 }
2599
2600 // Now that we've analyzed all potential sinking candidates, perform the
2601 // actual sink. We iteratively sink the last non-terminator of the source
2602 // blocks into their common successor unless doing so would require too
2603 // many PHI instructions to be generated (currently only one PHI is allowed
2604 // per sunk instruction).
2605 //
2606 // We can use InstructionsToSink to discount values needing PHI-merging that will
2607 // actually be sunk in a later iteration. This allows us to be more
2608 // aggressive in what we sink. This does allow a false positive where we
2609 // sink presuming a later value will also be sunk, but stop half way through
2610 // and never actually sink it which means we produce more PHIs than intended.
2611 // This is unlikely in practice though.
2612 int SinkIdx = 0;
2613 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2614 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2615 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2616 << "\n");
2617
2618 // Because we've sunk every instruction in turn, the current instruction to
2619 // sink is always at index 0.
2620 LRI.reset();
2621
2622 sinkLastInstruction(UnconditionalPreds);
2623 NumSinkCommonInstrs++;
2624 Changed = true;
2625 }
2626 if (SinkIdx != 0)
2627 ++NumSinkCommonCode;
2628 return Changed;
2629}
2630
2631namespace {
2632
2633struct CompatibleSets {
2634 using SetTy = SmallVector<InvokeInst *, 2>;
2635
2637
2638 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2639
2640 SetTy &getCompatibleSet(InvokeInst *II);
2641
2642 void insert(InvokeInst *II);
2643};
2644
2645CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2646 // Perform a linear scan over all the existing sets, see if the new `invoke`
2647 // is compatible with any particular set. Since we know that all the `invokes`
2648 // within a set are compatible, only check the first `invoke` in each set.
2649 // WARNING: at worst, this has quadratic complexity.
2650 for (CompatibleSets::SetTy &Set : Sets) {
2651 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2652 return Set;
2653 }
2654
2655 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2656 return Sets.emplace_back();
2657}
2658
2659void CompatibleSets::insert(InvokeInst *II) {
2660 getCompatibleSet(II).emplace_back(II);
2661}
2662
2663bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2664 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2665
2666 // Can we theoretically merge these `invoke`s?
2667 auto IsIllegalToMerge = [](InvokeInst *II) {
2668 return II->cannotMerge() || II->isInlineAsm();
2669 };
2670 if (any_of(Invokes, IsIllegalToMerge))
2671 return false;
2672
2673 // Either both `invoke`s must be direct,
2674 // or both `invoke`s must be indirect.
2675 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2676 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2677 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2678 if (HaveIndirectCalls) {
2679 if (!AllCallsAreIndirect)
2680 return false;
2681 } else {
2682 // All callees must be identical.
2683 Value *Callee = nullptr;
2684 for (InvokeInst *II : Invokes) {
2685 Value *CurrCallee = II->getCalledOperand();
2686 assert(CurrCallee && "There is always a called operand.");
2687 if (!Callee)
2688 Callee = CurrCallee;
2689 else if (Callee != CurrCallee)
2690 return false;
2691 }
2692 }
2693
2694 // Either both `invoke`s must not have a normal destination,
2695 // or both `invoke`s must have a normal destination,
2696 auto HasNormalDest = [](InvokeInst *II) {
2697 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2698 };
2699 if (any_of(Invokes, HasNormalDest)) {
2700 // Do not merge `invoke` that does not have a normal destination with one
2701 // that does have a normal destination, even though doing so would be legal.
2702 if (!all_of(Invokes, HasNormalDest))
2703 return false;
2704
2705 // All normal destinations must be identical.
2706 BasicBlock *NormalBB = nullptr;
2707 for (InvokeInst *II : Invokes) {
2708 BasicBlock *CurrNormalBB = II->getNormalDest();
2709 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2710 if (!NormalBB)
2711 NormalBB = CurrNormalBB;
2712 else if (NormalBB != CurrNormalBB)
2713 return false;
2714 }
2715
2716 // In the normal destination, the incoming values for these two `invoke`s
2717 // must be compatible.
2718 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2720 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2721 &EquivalenceSet))
2722 return false;
2723 }
2724
2725#ifndef NDEBUG
2726 // All unwind destinations must be identical.
2727 // We know that because we have started from said unwind destination.
2728 BasicBlock *UnwindBB = nullptr;
2729 for (InvokeInst *II : Invokes) {
2730 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2731 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2732 if (!UnwindBB)
2733 UnwindBB = CurrUnwindBB;
2734 else
2735 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2736 }
2737#endif
2738
2739 // In the unwind destination, the incoming values for these two `invoke`s
2740 // must be compatible.
2742 Invokes.front()->getUnwindDest(),
2743 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2744 return false;
2745
2746 // Ignoring arguments, these `invoke`s must be identical,
2747 // including operand bundles.
2748 const InvokeInst *II0 = Invokes.front();
2749 for (auto *II : Invokes.drop_front())
2750 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2751 return false;
2752
2753 // Can we theoretically form the data operands for the merged `invoke`?
2754 auto IsIllegalToMergeArguments = [](auto Ops) {
2755 Use &U0 = std::get<0>(Ops);
2756 Use &U1 = std::get<1>(Ops);
2757 if (U0 == U1)
2758 return false;
2760 U0.getOperandNo());
2761 };
2762 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2763 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2764 IsIllegalToMergeArguments))
2765 return false;
2766
2767 return true;
2768}
2769
2770} // namespace
2771
2772// Merge all invokes in the provided set, all of which are compatible
2773// as per the `CompatibleSets::shouldBelongToSameSet()`.
2775 DomTreeUpdater *DTU) {
2776 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2777
2779 if (DTU)
2780 Updates.reserve(2 + 3 * Invokes.size());
2781
2782 bool HasNormalDest =
2783 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2784
2785 // Clone one of the invokes into a new basic block.
2786 // Since they are all compatible, it doesn't matter which invoke is cloned.
2787 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2788 InvokeInst *II0 = Invokes.front();
2789 BasicBlock *II0BB = II0->getParent();
2790 BasicBlock *InsertBeforeBlock =
2791 II0->getParent()->getIterator()->getNextNode();
2792 Function *Func = II0BB->getParent();
2793 LLVMContext &Ctx = II0->getContext();
2794
2795 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2796 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2797
2798 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2799 // NOTE: all invokes have the same attributes, so no handling needed.
2800 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2801
2802 if (!HasNormalDest) {
2803 // This set does not have a normal destination,
2804 // so just form a new block with unreachable terminator.
2805 BasicBlock *MergedNormalDest = BasicBlock::Create(
2806 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2807 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2808 UI->setDebugLoc(DebugLoc::getTemporary());
2809 MergedInvoke->setNormalDest(MergedNormalDest);
2810 }
2811
2812 // The unwind destination, however, remainds identical for all invokes here.
2813
2814 return MergedInvoke;
2815 }();
2816
2817 if (DTU) {
2818 // Predecessor blocks that contained these invokes will now branch to
2819 // the new block that contains the merged invoke, ...
2820 for (InvokeInst *II : Invokes)
2821 Updates.push_back(
2822 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2823
2824 // ... which has the new `unreachable` block as normal destination,
2825 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2826 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2827 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2828 SuccBBOfMergedInvoke});
2829
2830 // Since predecessor blocks now unconditionally branch to a new block,
2831 // they no longer branch to their original successors.
2832 for (InvokeInst *II : Invokes)
2833 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2834 Updates.push_back(
2835 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2836 }
2837
2838 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2839
2840 // Form the merged operands for the merged invoke.
2841 for (Use &U : MergedInvoke->operands()) {
2842 // Only PHI together the indirect callees and data operands.
2843 if (MergedInvoke->isCallee(&U)) {
2844 if (!IsIndirectCall)
2845 continue;
2846 } else if (!MergedInvoke->isDataOperand(&U))
2847 continue;
2848
2849 // Don't create trivial PHI's with all-identical incoming values.
2850 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2851 return II->getOperand(U.getOperandNo()) != U.get();
2852 });
2853 if (!NeedPHI)
2854 continue;
2855
2856 // Form a PHI out of all the data ops under this index.
2858 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2859 for (InvokeInst *II : Invokes)
2860 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2861
2862 U.set(PN);
2863 }
2864
2865 // We've ensured that each PHI node has compatible (identical) incoming values
2866 // when coming from each of the `invoke`s in the current merge set,
2867 // so update the PHI nodes accordingly.
2868 for (BasicBlock *Succ : successors(MergedInvoke))
2869 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2870 /*ExistPred=*/Invokes.front()->getParent());
2871
2872 // And finally, replace the original `invoke`s with an unconditional branch
2873 // to the block with the merged `invoke`. Also, give that merged `invoke`
2874 // the merged debugloc of all the original `invoke`s.
2875 DILocation *MergedDebugLoc = nullptr;
2876 for (InvokeInst *II : Invokes) {
2877 // Compute the debug location common to all the original `invoke`s.
2878 if (!MergedDebugLoc)
2879 MergedDebugLoc = II->getDebugLoc();
2880 else
2881 MergedDebugLoc =
2882 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2883
2884 // And replace the old `invoke` with an unconditionally branch
2885 // to the block with the merged `invoke`.
2886 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2887 OrigSuccBB->removePredecessor(II->getParent());
2888 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2889 // The unconditional branch is part of the replacement for the original
2890 // invoke, so should use its DebugLoc.
2891 BI->setDebugLoc(II->getDebugLoc());
2892 bool Success = MergedInvoke->tryIntersectAttributes(II);
2893 assert(Success && "Merged invokes with incompatible attributes");
2894 // For NDEBUG Compile
2895 (void)Success;
2896 II->replaceAllUsesWith(MergedInvoke);
2897 II->eraseFromParent();
2898 ++NumInvokesMerged;
2899 }
2900 MergedInvoke->setDebugLoc(MergedDebugLoc);
2901 ++NumInvokeSetsFormed;
2902
2903 if (DTU)
2904 DTU->applyUpdates(Updates);
2905}
2906
2907/// If this block is a `landingpad` exception handling block, categorize all
2908/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2909/// being "mergeable" together, and then merge invokes in each set together.
2910///
2911/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2912/// [...] [...]
2913/// | |
2914/// [invoke0] [invoke1]
2915/// / \ / \
2916/// [cont0] [landingpad] [cont1]
2917/// to:
2918/// [...] [...]
2919/// \ /
2920/// [invoke]
2921/// / \
2922/// [cont] [landingpad]
2923///
2924/// But of course we can only do that if the invokes share the `landingpad`,
2925/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2926/// and the invoked functions are "compatible".
2929 return false;
2930
2931 bool Changed = false;
2932
2933 // FIXME: generalize to all exception handling blocks?
2934 if (!BB->isLandingPad())
2935 return Changed;
2936
2937 CompatibleSets Grouper;
2938
2939 // Record all the predecessors of this `landingpad`. As per verifier,
2940 // the only allowed predecessor is the unwind edge of an `invoke`.
2941 // We want to group "compatible" `invokes` into the same set to be merged.
2942 for (BasicBlock *PredBB : predecessors(BB))
2943 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2944
2945 // And now, merge `invoke`s that were grouped togeter.
2946 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2947 if (Invokes.size() < 2)
2948 continue;
2949 Changed = true;
2950 mergeCompatibleInvokesImpl(Invokes, DTU);
2951 }
2952
2953 return Changed;
2954}
2955
2956namespace {
2957/// Track ephemeral values, which should be ignored for cost-modelling
2958/// purposes. Requires walking instructions in reverse order.
2959class EphemeralValueTracker {
2960 SmallPtrSet<const Instruction *, 32> EphValues;
2961
2962 bool isEphemeral(const Instruction *I) {
2963 if (isa<AssumeInst>(I))
2964 return true;
2965 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2966 all_of(I->users(), [&](const User *U) {
2967 return EphValues.count(cast<Instruction>(U));
2968 });
2969 }
2970
2971public:
2972 bool track(const Instruction *I) {
2973 if (isEphemeral(I)) {
2974 EphValues.insert(I);
2975 return true;
2976 }
2977 return false;
2978 }
2979
2980 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2981};
2982} // namespace
2983
2984/// Determine if we can hoist sink a sole store instruction out of a
2985/// conditional block.
2986///
2987/// We are looking for code like the following:
2988/// BrBB:
2989/// store i32 %add, i32* %arrayidx2
2990/// ... // No other stores or function calls (we could be calling a memory
2991/// ... // function).
2992/// %cmp = icmp ult %x, %y
2993/// br i1 %cmp, label %EndBB, label %ThenBB
2994/// ThenBB:
2995/// store i32 %add5, i32* %arrayidx2
2996/// br label EndBB
2997/// EndBB:
2998/// ...
2999/// We are going to transform this into:
3000/// BrBB:
3001/// store i32 %add, i32* %arrayidx2
3002/// ... //
3003/// %cmp = icmp ult %x, %y
3004/// %add.add5 = select i1 %cmp, i32 %add, %add5
3005/// store i32 %add.add5, i32* %arrayidx2
3006/// ...
3007///
3008/// \return The pointer to the value of the previous store if the store can be
3009/// hoisted into the predecessor block. 0 otherwise.
3011 BasicBlock *StoreBB, BasicBlock *EndBB) {
3012 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3013 if (!StoreToHoist)
3014 return nullptr;
3015
3016 // Volatile or atomic.
3017 if (!StoreToHoist->isSimple())
3018 return nullptr;
3019
3020 Value *StorePtr = StoreToHoist->getPointerOperand();
3021 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3022
3023 // Look for a store to the same pointer in BrBB.
3024 unsigned MaxNumInstToLookAt = 9;
3025 // Skip pseudo probe intrinsic calls which are not really killing any memory
3026 // accesses.
3027 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3028 if (!MaxNumInstToLookAt)
3029 break;
3030 --MaxNumInstToLookAt;
3031
3032 // Could be calling an instruction that affects memory like free().
3033 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3034 return nullptr;
3035
3036 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3037 // Found the previous store to same location and type. Make sure it is
3038 // simple, to avoid introducing a spurious non-atomic write after an
3039 // atomic write.
3040 if (SI->getPointerOperand() == StorePtr &&
3041 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3042 SI->getAlign() >= StoreToHoist->getAlign())
3043 // Found the previous store, return its value operand.
3044 return SI->getValueOperand();
3045 return nullptr; // Unknown store.
3046 }
3047
3048 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3049 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3050 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3051 Value *Obj = getUnderlyingObject(StorePtr);
3052 bool ExplicitlyDereferenceableOnly;
3053 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3055 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3057 (!ExplicitlyDereferenceableOnly ||
3058 isDereferenceablePointer(StorePtr, StoreTy,
3059 LI->getDataLayout()))) {
3060 // Found a previous load, return it.
3061 return LI;
3062 }
3063 }
3064 // The load didn't work out, but we may still find a store.
3065 }
3066 }
3067
3068 return nullptr;
3069}
3070
3071/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3072/// converted to selects.
3074 BasicBlock *EndBB,
3075 unsigned &SpeculatedInstructions,
3076 InstructionCost &Cost,
3077 const TargetTransformInfo &TTI) {
3079 BB->getParent()->hasMinSize()
3082
3083 bool HaveRewritablePHIs = false;
3084 for (PHINode &PN : EndBB->phis()) {
3085 Value *OrigV = PN.getIncomingValueForBlock(BB);
3086 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3087
3088 // FIXME: Try to remove some of the duplication with
3089 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3090 if (ThenV == OrigV)
3091 continue;
3092
3093 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3094 CmpInst::makeCmpResultType(PN.getType()),
3096
3097 // Don't convert to selects if we could remove undefined behavior instead.
3098 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3100 return false;
3101
3102 HaveRewritablePHIs = true;
3103 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3104 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3105 if (!OrigCE && !ThenCE)
3106 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3107
3108 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3109 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3110 InstructionCost MaxCost =
3112 if (OrigCost + ThenCost > MaxCost)
3113 return false;
3114
3115 // Account for the cost of an unfolded ConstantExpr which could end up
3116 // getting expanded into Instructions.
3117 // FIXME: This doesn't account for how many operations are combined in the
3118 // constant expression.
3119 ++SpeculatedInstructions;
3120 if (SpeculatedInstructions > 1)
3121 return false;
3122 }
3123
3124 return HaveRewritablePHIs;
3125}
3126
3128 std::optional<bool> Invert,
3129 const TargetTransformInfo &TTI) {
3130 // If the branch is non-unpredictable, and is predicted to *not* branch to
3131 // the `then` block, then avoid speculating it.
3132 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3133 return true;
3134
3135 uint64_t TWeight, FWeight;
3136 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3137 return true;
3138
3139 if (!Invert.has_value())
3140 return false;
3141
3142 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3143 BranchProbability BIEndProb =
3144 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3145 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3146 return BIEndProb < Likely;
3147}
3148
3149/// Speculate a conditional basic block flattening the CFG.
3150///
3151/// Note that this is a very risky transform currently. Speculating
3152/// instructions like this is most often not desirable. Instead, there is an MI
3153/// pass which can do it with full awareness of the resource constraints.
3154/// However, some cases are "obvious" and we should do directly. An example of
3155/// this is speculating a single, reasonably cheap instruction.
3156///
3157/// There is only one distinct advantage to flattening the CFG at the IR level:
3158/// it makes very common but simplistic optimizations such as are common in
3159/// instcombine and the DAG combiner more powerful by removing CFG edges and
3160/// modeling their effects with easier to reason about SSA value graphs.
3161///
3162///
3163/// An illustration of this transform is turning this IR:
3164/// \code
3165/// BB:
3166/// %cmp = icmp ult %x, %y
3167/// br i1 %cmp, label %EndBB, label %ThenBB
3168/// ThenBB:
3169/// %sub = sub %x, %y
3170/// br label BB2
3171/// EndBB:
3172/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3173/// ...
3174/// \endcode
3175///
3176/// Into this IR:
3177/// \code
3178/// BB:
3179/// %cmp = icmp ult %x, %y
3180/// %sub = sub %x, %y
3181/// %cond = select i1 %cmp, 0, %sub
3182/// ...
3183/// \endcode
3184///
3185/// \returns true if the conditional block is removed.
3186bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3187 BasicBlock *ThenBB) {
3188 if (!Options.SpeculateBlocks)
3189 return false;
3190
3191 // Be conservative for now. FP select instruction can often be expensive.
3192 Value *BrCond = BI->getCondition();
3193 if (isa<FCmpInst>(BrCond))
3194 return false;
3195
3196 BasicBlock *BB = BI->getParent();
3197 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3198 InstructionCost Budget =
3200
3201 // If ThenBB is actually on the false edge of the conditional branch, remember
3202 // to swap the select operands later.
3203 bool Invert = false;
3204 if (ThenBB != BI->getSuccessor(0)) {
3205 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3206 Invert = true;
3207 }
3208 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3209
3210 if (!isProfitableToSpeculate(BI, Invert, TTI))
3211 return false;
3212
3213 // Keep a count of how many times instructions are used within ThenBB when
3214 // they are candidates for sinking into ThenBB. Specifically:
3215 // - They are defined in BB, and
3216 // - They have no side effects, and
3217 // - All of their uses are in ThenBB.
3218 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3219
3220 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3221
3222 unsigned SpeculatedInstructions = 0;
3223 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3224 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3225 Value *SpeculatedStoreValue = nullptr;
3226 StoreInst *SpeculatedStore = nullptr;
3227 EphemeralValueTracker EphTracker;
3228 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3229 // Skip pseudo probes. The consequence is we lose track of the branch
3230 // probability for ThenBB, which is fine since the optimization here takes
3231 // place regardless of the branch probability.
3232 if (isa<PseudoProbeInst>(I)) {
3233 // The probe should be deleted so that it will not be over-counted when
3234 // the samples collected on the non-conditional path are counted towards
3235 // the conditional path. We leave it for the counts inference algorithm to
3236 // figure out a proper count for an unknown probe.
3237 SpeculatedPseudoProbes.push_back(&I);
3238 continue;
3239 }
3240
3241 // Ignore ephemeral values, they will be dropped by the transform.
3242 if (EphTracker.track(&I))
3243 continue;
3244
3245 // Only speculatively execute a single instruction (not counting the
3246 // terminator) for now.
3247 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3249 SpeculatedConditionalLoadsStores.size() <
3251 // Not count load/store into cost if target supports conditional faulting
3252 // b/c it's cheap to speculate it.
3253 if (IsSafeCheapLoadStore)
3254 SpeculatedConditionalLoadsStores.push_back(&I);
3255 else
3256 ++SpeculatedInstructions;
3257
3258 if (SpeculatedInstructions > 1)
3259 return false;
3260
3261 // Don't hoist the instruction if it's unsafe or expensive.
3262 if (!IsSafeCheapLoadStore &&
3264 !(HoistCondStores && !SpeculatedStoreValue &&
3265 (SpeculatedStoreValue =
3266 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3267 return false;
3268 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3271 return false;
3272
3273 // Store the store speculation candidate.
3274 if (!SpeculatedStore && SpeculatedStoreValue)
3275 SpeculatedStore = cast<StoreInst>(&I);
3276
3277 // Do not hoist the instruction if any of its operands are defined but not
3278 // used in BB. The transformation will prevent the operand from
3279 // being sunk into the use block.
3280 for (Use &Op : I.operands()) {
3282 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3283 continue; // Not a candidate for sinking.
3284
3285 ++SinkCandidateUseCounts[OpI];
3286 }
3287 }
3288
3289 // Consider any sink candidates which are only used in ThenBB as costs for
3290 // speculation. Note, while we iterate over a DenseMap here, we are summing
3291 // and so iteration order isn't significant.
3292 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3293 if (Inst->hasNUses(Count)) {
3294 ++SpeculatedInstructions;
3295 if (SpeculatedInstructions > 1)
3296 return false;
3297 }
3298
3299 // Check that we can insert the selects and that it's not too expensive to do
3300 // so.
3301 bool Convert =
3302 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3304 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3305 SpeculatedInstructions, Cost, TTI);
3306 if (!Convert || Cost > Budget)
3307 return false;
3308
3309 // If we get here, we can hoist the instruction and if-convert.
3310 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3311
3312 Instruction *Sel = nullptr;
3313 // Insert a select of the value of the speculated store.
3314 if (SpeculatedStoreValue) {
3315 IRBuilder<NoFolder> Builder(BI);
3316 Value *OrigV = SpeculatedStore->getValueOperand();
3317 Value *TrueV = SpeculatedStore->getValueOperand();
3318 Value *FalseV = SpeculatedStoreValue;
3319 if (Invert)
3320 std::swap(TrueV, FalseV);
3321 Value *S = Builder.CreateSelect(
3322 BrCond, TrueV, FalseV, "spec.store.select", BI);
3323 Sel = cast<Instruction>(S);
3324 SpeculatedStore->setOperand(0, S);
3325 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3326 SpeculatedStore->getDebugLoc());
3327 // The value stored is still conditional, but the store itself is now
3328 // unconditonally executed, so we must be sure that any linked dbg.assign
3329 // intrinsics are tracking the new stored value (the result of the
3330 // select). If we don't, and the store were to be removed by another pass
3331 // (e.g. DSE), then we'd eventually end up emitting a location describing
3332 // the conditional value, unconditionally.
3333 //
3334 // === Before this transformation ===
3335 // pred:
3336 // store %one, %x.dest, !DIAssignID !1
3337 // dbg.assign %one, "x", ..., !1, ...
3338 // br %cond if.then
3339 //
3340 // if.then:
3341 // store %two, %x.dest, !DIAssignID !2
3342 // dbg.assign %two, "x", ..., !2, ...
3343 //
3344 // === After this transformation ===
3345 // pred:
3346 // store %one, %x.dest, !DIAssignID !1
3347 // dbg.assign %one, "x", ..., !1
3348 /// ...
3349 // %merge = select %cond, %two, %one
3350 // store %merge, %x.dest, !DIAssignID !2
3351 // dbg.assign %merge, "x", ..., !2
3352 for (DbgVariableRecord *DbgAssign :
3353 at::getDVRAssignmentMarkers(SpeculatedStore))
3354 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3355 DbgAssign->replaceVariableLocationOp(OrigV, S);
3356 }
3357
3358 // Metadata can be dependent on the condition we are hoisting above.
3359 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3360 // to avoid making it appear as if the condition is a constant, which would
3361 // be misleading while debugging.
3362 // Similarly strip attributes that maybe dependent on condition we are
3363 // hoisting above.
3364 for (auto &I : make_early_inc_range(*ThenBB)) {
3365 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3366 I.dropLocation();
3367 }
3368 I.dropUBImplyingAttrsAndMetadata();
3369
3370 // Drop ephemeral values.
3371 if (EphTracker.contains(&I)) {
3372 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3373 I.eraseFromParent();
3374 }
3375 }
3376
3377 // Hoist the instructions.
3378 // Drop DbgVariableRecords attached to these instructions.
3379 for (auto &It : *ThenBB)
3380 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3381 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3382 // equivalent).
3383 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3384 !DVR || !DVR->isDbgAssign())
3385 It.dropOneDbgRecord(&DR);
3386 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3387 std::prev(ThenBB->end()));
3388
3389 if (!SpeculatedConditionalLoadsStores.empty())
3390 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3391 Sel);
3392
3393 // Insert selects and rewrite the PHI operands.
3394 IRBuilder<NoFolder> Builder(BI);
3395 for (PHINode &PN : EndBB->phis()) {
3396 unsigned OrigI = PN.getBasicBlockIndex(BB);
3397 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3398 Value *OrigV = PN.getIncomingValue(OrigI);
3399 Value *ThenV = PN.getIncomingValue(ThenI);
3400
3401 // Skip PHIs which are trivial.
3402 if (OrigV == ThenV)
3403 continue;
3404
3405 // Create a select whose true value is the speculatively executed value and
3406 // false value is the pre-existing value. Swap them if the branch
3407 // destinations were inverted.
3408 Value *TrueV = ThenV, *FalseV = OrigV;
3409 if (Invert)
3410 std::swap(TrueV, FalseV);
3411 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3412 PN.setIncomingValue(OrigI, V);
3413 PN.setIncomingValue(ThenI, V);
3414 }
3415
3416 // Remove speculated pseudo probes.
3417 for (Instruction *I : SpeculatedPseudoProbes)
3418 I->eraseFromParent();
3419
3420 ++NumSpeculations;
3421 return true;
3422}
3423
3425
3426// Return false if number of blocks searched is too much.
3427static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3428 BlocksSet &ReachesNonLocalUses) {
3429 if (BB == DefBB)
3430 return true;
3431 if (!ReachesNonLocalUses.insert(BB).second)
3432 return true;
3433
3434 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3435 return false;
3436 for (BasicBlock *Pred : predecessors(BB))
3437 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3438 return false;
3439 return true;
3440}
3441
3442/// Return true if we can thread a branch across this block.
3444 BlocksSet &NonLocalUseBlocks) {
3445 int Size = 0;
3446 EphemeralValueTracker EphTracker;
3447
3448 // Walk the loop in reverse so that we can identify ephemeral values properly
3449 // (values only feeding assumes).
3450 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3451 // Can't fold blocks that contain noduplicate or convergent calls.
3452 if (CallInst *CI = dyn_cast<CallInst>(&I))
3453 if (CI->cannotDuplicate() || CI->isConvergent())
3454 return false;
3455
3456 // Ignore ephemeral values which are deleted during codegen.
3457 // We will delete Phis while threading, so Phis should not be accounted in
3458 // block's size.
3459 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3460 if (Size++ > MaxSmallBlockSize)
3461 return false; // Don't clone large BB's.
3462 }
3463
3464 // Record blocks with non-local uses of values defined in the current basic
3465 // block.
3466 for (User *U : I.users()) {
3468 BasicBlock *UsedInBB = UI->getParent();
3469 if (UsedInBB == BB) {
3470 if (isa<PHINode>(UI))
3471 return false;
3472 } else
3473 NonLocalUseBlocks.insert(UsedInBB);
3474 }
3475
3476 // Looks ok, continue checking.
3477 }
3478
3479 return true;
3480}
3481
3483 BasicBlock *To) {
3484 // Don't look past the block defining the value, we might get the value from
3485 // a previous loop iteration.
3486 auto *I = dyn_cast<Instruction>(V);
3487 if (I && I->getParent() == To)
3488 return nullptr;
3489
3490 // We know the value if the From block branches on it.
3491 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3492 if (BI && BI->isConditional() && BI->getCondition() == V &&
3493 BI->getSuccessor(0) != BI->getSuccessor(1))
3494 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3496
3497 return nullptr;
3498}
3499
3500/// If we have a conditional branch on something for which we know the constant
3501/// value in predecessors (e.g. a phi node in the current block), thread edges
3502/// from the predecessor to their ultimate destination.
3503static std::optional<bool>
3505 const DataLayout &DL,
3506 AssumptionCache *AC) {
3508 BasicBlock *BB = BI->getParent();
3509 Value *Cond = BI->getCondition();
3511 if (PN && PN->getParent() == BB) {
3512 // Degenerate case of a single entry PHI.
3513 if (PN->getNumIncomingValues() == 1) {
3515 return true;
3516 }
3517
3518 for (Use &U : PN->incoming_values())
3519 if (auto *CB = dyn_cast<ConstantInt>(U))
3520 KnownValues[CB].insert(PN->getIncomingBlock(U));
3521 } else {
3522 for (BasicBlock *Pred : predecessors(BB)) {
3523 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3524 KnownValues[CB].insert(Pred);
3525 }
3526 }
3527
3528 if (KnownValues.empty())
3529 return false;
3530
3531 // Now we know that this block has multiple preds and two succs.
3532 // Check that the block is small enough and record which non-local blocks use
3533 // values defined in the block.
3534
3535 BlocksSet NonLocalUseBlocks;
3536 BlocksSet ReachesNonLocalUseBlocks;
3537 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3538 return false;
3539
3540 // Jump-threading can only be done to destinations where no values defined
3541 // in BB are live.
3542
3543 // Quickly check if both destinations have uses. If so, jump-threading cannot
3544 // be done.
3545 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3546 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3547 return false;
3548
3549 // Search backward from NonLocalUseBlocks to find which blocks
3550 // reach non-local uses.
3551 for (BasicBlock *UseBB : NonLocalUseBlocks)
3552 // Give up if too many blocks are searched.
3553 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3554 return false;
3555
3556 for (const auto &Pair : KnownValues) {
3557 ConstantInt *CB = Pair.first;
3558 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3559 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3560
3561 // Okay, we now know that all edges from PredBB should be revectored to
3562 // branch to RealDest.
3563 if (RealDest == BB)
3564 continue; // Skip self loops.
3565
3566 // Skip if the predecessor's terminator is an indirect branch.
3567 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3568 return isa<IndirectBrInst>(PredBB->getTerminator());
3569 }))
3570 continue;
3571
3572 // Only revector to RealDest if no values defined in BB are live.
3573 if (ReachesNonLocalUseBlocks.contains(RealDest))
3574 continue;
3575
3576 LLVM_DEBUG({
3577 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3578 << " has value " << *Pair.first << " in predecessors:\n";
3579 for (const BasicBlock *PredBB : Pair.second)
3580 dbgs() << " " << PredBB->getName() << "\n";
3581 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3582 });
3583
3584 // Split the predecessors we are threading into a new edge block. We'll
3585 // clone the instructions into this block, and then redirect it to RealDest.
3586 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3587
3588 // TODO: These just exist to reduce test diff, we can drop them if we like.
3589 EdgeBB->setName(RealDest->getName() + ".critedge");
3590 EdgeBB->moveBefore(RealDest);
3591
3592 // Update PHI nodes.
3593 addPredecessorToBlock(RealDest, EdgeBB, BB);
3594
3595 // BB may have instructions that are being threaded over. Clone these
3596 // instructions into EdgeBB. We know that there will be no uses of the
3597 // cloned instructions outside of EdgeBB.
3598 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3599 ValueToValueMapTy TranslateMap; // Track translated values.
3600 TranslateMap[Cond] = CB;
3601
3602 // RemoveDIs: track instructions that we optimise away while folding, so
3603 // that we can copy DbgVariableRecords from them later.
3604 BasicBlock::iterator SrcDbgCursor = BB->begin();
3605 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3606 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3607 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3608 continue;
3609 }
3610 // Clone the instruction.
3611 Instruction *N = BBI->clone();
3612 // Insert the new instruction into its new home.
3613 N->insertInto(EdgeBB, InsertPt);
3614
3615 if (BBI->hasName())
3616 N->setName(BBI->getName() + ".c");
3617
3618 // Update operands due to translation.
3619 // Key Instructions: Remap all the atom groups.
3620 if (const DebugLoc &DL = BBI->getDebugLoc())
3621 mapAtomInstance(DL, TranslateMap);
3622 RemapInstruction(N, TranslateMap,
3624
3625 // Check for trivial simplification.
3626 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3627 if (!BBI->use_empty())
3628 TranslateMap[&*BBI] = V;
3629 if (!N->mayHaveSideEffects()) {
3630 N->eraseFromParent(); // Instruction folded away, don't need actual
3631 // inst
3632 N = nullptr;
3633 }
3634 } else {
3635 if (!BBI->use_empty())
3636 TranslateMap[&*BBI] = N;
3637 }
3638 if (N) {
3639 // Copy all debug-info attached to instructions from the last we
3640 // successfully clone, up to this instruction (they might have been
3641 // folded away).
3642 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3643 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3644 SrcDbgCursor = std::next(BBI);
3645 // Clone debug-info on this instruction too.
3646 N->cloneDebugInfoFrom(&*BBI);
3647
3648 // Register the new instruction with the assumption cache if necessary.
3649 if (auto *Assume = dyn_cast<AssumeInst>(N))
3650 if (AC)
3651 AC->registerAssumption(Assume);
3652 }
3653 }
3654
3655 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3656 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3657 InsertPt->cloneDebugInfoFrom(BI);
3658
3659 BB->removePredecessor(EdgeBB);
3660 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3661 EdgeBI->setSuccessor(0, RealDest);
3662 EdgeBI->setDebugLoc(BI->getDebugLoc());
3663
3664 if (DTU) {
3666 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3667 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3668 DTU->applyUpdates(Updates);
3669 }
3670
3671 // For simplicity, we created a separate basic block for the edge. Merge
3672 // it back into the predecessor if possible. This not only avoids
3673 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3674 // bypass the check for trivial cycles above.
3675 MergeBlockIntoPredecessor(EdgeBB, DTU);
3676
3677 // Signal repeat, simplifying any other constants.
3678 return std::nullopt;
3679 }
3680
3681 return false;
3682}
3683
3684bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3685 // Note: If BB is a loop header then there is a risk that threading introduces
3686 // a non-canonical loop by moving a back edge. So we avoid this optimization
3687 // for loop headers if NeedCanonicalLoop is set.
3688 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3689 return false;
3690
3691 std::optional<bool> Result;
3692 bool EverChanged = false;
3693 do {
3694 // Note that None means "we changed things, but recurse further."
3695 Result =
3697 EverChanged |= Result == std::nullopt || *Result;
3698 } while (Result == std::nullopt);
3699 return EverChanged;
3700}
3701
3702/// Given a BB that starts with the specified two-entry PHI node,
3703/// see if we can eliminate it.
3706 const DataLayout &DL,
3707 bool SpeculateUnpredictables) {
3708 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3709 // statement", which has a very simple dominance structure. Basically, we
3710 // are trying to find the condition that is being branched on, which
3711 // subsequently causes this merge to happen. We really want control
3712 // dependence information for this check, but simplifycfg can't keep it up
3713 // to date, and this catches most of the cases we care about anyway.
3714 BasicBlock *BB = PN->getParent();
3715
3716 BasicBlock *IfTrue, *IfFalse;
3717 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3718 if (!DomBI)
3719 return false;
3720 Value *IfCond = DomBI->getCondition();
3721 // Don't bother if the branch will be constant folded trivially.
3722 if (isa<ConstantInt>(IfCond))
3723 return false;
3724
3725 BasicBlock *DomBlock = DomBI->getParent();
3728 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3729 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3730 });
3731 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3732 "Will have either one or two blocks to speculate.");
3733
3734 // If the branch is non-unpredictable, see if we either predictably jump to
3735 // the merge bb (if we have only a single 'then' block), or if we predictably
3736 // jump to one specific 'then' block (if we have two of them).
3737 // It isn't beneficial to speculatively execute the code
3738 // from the block that we know is predictably not entered.
3739 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3740 if (!IsUnpredictable) {
3741 uint64_t TWeight, FWeight;
3742 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3743 (TWeight + FWeight) != 0) {
3744 BranchProbability BITrueProb =
3745 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3746 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3747 BranchProbability BIFalseProb = BITrueProb.getCompl();
3748 if (IfBlocks.size() == 1) {
3749 BranchProbability BIBBProb =
3750 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3751 if (BIBBProb >= Likely)
3752 return false;
3753 } else {
3754 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3755 return false;
3756 }
3757 }
3758 }
3759
3760 // Don't try to fold an unreachable block. For example, the phi node itself
3761 // can't be the candidate if-condition for a select that we want to form.
3762 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3763 if (IfCondPhiInst->getParent() == BB)
3764 return false;
3765
3766 // Okay, we found that we can merge this two-entry phi node into a select.
3767 // Doing so would require us to fold *all* two entry phi nodes in this block.
3768 // At some point this becomes non-profitable (particularly if the target
3769 // doesn't support cmov's). Only do this transformation if there are two or
3770 // fewer PHI nodes in this block.
3771 unsigned NumPhis = 0;
3772 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3773 if (NumPhis > 2)
3774 return false;
3775
3776 // Loop over the PHI's seeing if we can promote them all to select
3777 // instructions. While we are at it, keep track of the instructions
3778 // that need to be moved to the dominating block.
3779 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3780 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3781 InstructionCost Cost = 0;
3782 InstructionCost Budget =
3784 if (SpeculateUnpredictables && IsUnpredictable)
3785 Budget += TTI.getBranchMispredictPenalty();
3786
3787 bool Changed = false;
3788 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3789 PHINode *PN = cast<PHINode>(II++);
3790 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3791 PN->replaceAllUsesWith(V);
3792 PN->eraseFromParent();
3793 Changed = true;
3794 continue;
3795 }
3796
3797 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3798 AggressiveInsts, Cost, Budget, TTI, AC,
3799 ZeroCostInstructions) ||
3800 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3801 AggressiveInsts, Cost, Budget, TTI, AC,
3802 ZeroCostInstructions))
3803 return Changed;
3804 }
3805
3806 // If we folded the first phi, PN dangles at this point. Refresh it. If
3807 // we ran out of PHIs then we simplified them all.
3808 PN = dyn_cast<PHINode>(BB->begin());
3809 if (!PN)
3810 return true;
3811
3812 // Return true if at least one of these is a 'not', and another is either
3813 // a 'not' too, or a constant.
3814 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3815 if (!match(V0, m_Not(m_Value())))
3816 std::swap(V0, V1);
3817 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3818 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3819 };
3820
3821 // Don't fold i1 branches on PHIs which contain binary operators or
3822 // (possibly inverted) select form of or/ands, unless one of
3823 // the incoming values is an 'not' and another one is freely invertible.
3824 // These can often be turned into switches and other things.
3825 auto IsBinOpOrAnd = [](Value *V) {
3826 return match(
3828 };
3829 if (PN->getType()->isIntegerTy(1) &&
3830 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3831 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3832 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3833 PN->getIncomingValue(1)))
3834 return Changed;
3835
3836 // If all PHI nodes are promotable, check to make sure that all instructions
3837 // in the predecessor blocks can be promoted as well. If not, we won't be able
3838 // to get rid of the control flow, so it's not worth promoting to select
3839 // instructions.
3840 for (BasicBlock *IfBlock : IfBlocks)
3841 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3842 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3843 // This is not an aggressive instruction that we can promote.
3844 // Because of this, we won't be able to get rid of the control flow, so
3845 // the xform is not worth it.
3846 return Changed;
3847 }
3848
3849 // If either of the blocks has it's address taken, we can't do this fold.
3850 if (any_of(IfBlocks,
3851 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3852 return Changed;
3853
3854 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3855 if (IsUnpredictable) dbgs() << " (unpredictable)";
3856 dbgs() << " T: " << IfTrue->getName()
3857 << " F: " << IfFalse->getName() << "\n");
3858
3859 // If we can still promote the PHI nodes after this gauntlet of tests,
3860 // do all of the PHI's now.
3861
3862 // Move all 'aggressive' instructions, which are defined in the
3863 // conditional parts of the if's up to the dominating block.
3864 for (BasicBlock *IfBlock : IfBlocks)
3865 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3866
3867 IRBuilder<NoFolder> Builder(DomBI);
3868 // Propagate fast-math-flags from phi nodes to replacement selects.
3869 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3870 // Change the PHI node into a select instruction.
3871 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3872 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3873
3874 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3875 isa<FPMathOperator>(PN) ? PN : nullptr,
3876 "", DomBI);
3877 PN->replaceAllUsesWith(Sel);
3878 Sel->takeName(PN);
3879 PN->eraseFromParent();
3880 }
3881
3882 // At this point, all IfBlocks are empty, so our if statement
3883 // has been flattened. Change DomBlock to jump directly to our new block to
3884 // avoid other simplifycfg's kicking in on the diamond.
3885 Builder.CreateBr(BB);
3886
3888 if (DTU) {
3889 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3890 for (auto *Successor : successors(DomBlock))
3891 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3892 }
3893
3894 DomBI->eraseFromParent();
3895 if (DTU)
3896 DTU->applyUpdates(Updates);
3897
3898 return true;
3899}
3900
3903 Value *RHS, const Twine &Name = "") {
3904 // Try to relax logical op to binary op.
3905 if (impliesPoison(RHS, LHS))
3906 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3907 if (Opc == Instruction::And)
3908 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3909 if (Opc == Instruction::Or)
3910 return Builder.CreateLogicalOr(LHS, RHS, Name);
3911 llvm_unreachable("Invalid logical opcode");
3912}
3913
3914/// Return true if either PBI or BI has branch weight available, and store
3915/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3916/// not have branch weight, use 1:1 as its weight.
3918 uint64_t &PredTrueWeight,
3919 uint64_t &PredFalseWeight,
3920 uint64_t &SuccTrueWeight,
3921 uint64_t &SuccFalseWeight) {
3922 bool PredHasWeights =
3923 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3924 bool SuccHasWeights =
3925 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3926 if (PredHasWeights || SuccHasWeights) {
3927 if (!PredHasWeights)
3928 PredTrueWeight = PredFalseWeight = 1;
3929 if (!SuccHasWeights)
3930 SuccTrueWeight = SuccFalseWeight = 1;
3931 return true;
3932 } else {
3933 return false;
3934 }
3935}
3936
3937/// Determine if the two branches share a common destination and deduce a glue
3938/// that joins the branches' conditions to arrive at the common destination if
3939/// that would be profitable.
3940static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3942 const TargetTransformInfo *TTI) {
3943 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3944 "Both blocks must end with a conditional branches.");
3946 "PredBB must be a predecessor of BB.");
3947
3948 // We have the potential to fold the conditions together, but if the
3949 // predecessor branch is predictable, we may not want to merge them.
3950 uint64_t PTWeight, PFWeight;
3951 BranchProbability PBITrueProb, Likely;
3952 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3953 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3954 (PTWeight + PFWeight) != 0) {
3955 PBITrueProb =
3956 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3957 Likely = TTI->getPredictableBranchThreshold();
3958 }
3959
3960 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3961 // Speculate the 2nd condition unless the 1st is probably true.
3962 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3963 return {{BI->getSuccessor(0), Instruction::Or, false}};
3964 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3965 // Speculate the 2nd condition unless the 1st is probably false.
3966 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3967 return {{BI->getSuccessor(1), Instruction::And, false}};
3968 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3969 // Speculate the 2nd condition unless the 1st is probably true.
3970 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3971 return {{BI->getSuccessor(1), Instruction::And, true}};
3972 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3973 // Speculate the 2nd condition unless the 1st is probably false.
3974 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3975 return {{BI->getSuccessor(0), Instruction::Or, true}};
3976 }
3977 return std::nullopt;
3978}
3979
3981 DomTreeUpdater *DTU,
3982 MemorySSAUpdater *MSSAU,
3983 const TargetTransformInfo *TTI) {
3984 BasicBlock *BB = BI->getParent();
3985 BasicBlock *PredBlock = PBI->getParent();
3986
3987 // Determine if the two branches share a common destination.
3988 BasicBlock *CommonSucc;
3990 bool InvertPredCond;
3991 std::tie(CommonSucc, Opc, InvertPredCond) =
3993
3994 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3995
3996 IRBuilder<> Builder(PBI);
3997 // The builder is used to create instructions to eliminate the branch in BB.
3998 // If BB's terminator has !annotation metadata, add it to the new
3999 // instructions.
4000 Builder.CollectMetadataToCopy(BB->getTerminator(),
4001 {LLVMContext::MD_annotation});
4002
4003 // If we need to invert the condition in the pred block to match, do so now.
4004 if (InvertPredCond) {
4005 InvertBranch(PBI, Builder);
4006 }
4007
4008 BasicBlock *UniqueSucc =
4009 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4010
4011 // Before cloning instructions, notify the successor basic block that it
4012 // is about to have a new predecessor. This will update PHI nodes,
4013 // which will allow us to update live-out uses of bonus instructions.
4014 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4015
4016 // Try to update branch weights.
4017 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4018 SmallVector<uint64_t, 2> MDWeights;
4019 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4020 SuccTrueWeight, SuccFalseWeight)) {
4021
4022 if (PBI->getSuccessor(0) == BB) {
4023 // PBI: br i1 %x, BB, FalseDest
4024 // BI: br i1 %y, UniqueSucc, FalseDest
4025 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4026 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4027 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4028 // TrueWeight for PBI * FalseWeight for BI.
4029 // We assume that total weights of a BranchInst can fit into 32 bits.
4030 // Therefore, we will not have overflow using 64-bit arithmetic.
4031 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4032 PredTrueWeight * SuccFalseWeight);
4033 } else {
4034 // PBI: br i1 %x, TrueDest, BB
4035 // BI: br i1 %y, TrueDest, UniqueSucc
4036 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4037 // FalseWeight for PBI * TrueWeight for BI.
4038 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4039 PredFalseWeight * SuccTrueWeight);
4040 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4041 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4042 }
4043
4044 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4045 /*ElideAllZero=*/true);
4046
4047 // TODO: If BB is reachable from all paths through PredBlock, then we
4048 // could replace PBI's branch probabilities with BI's.
4049 } else
4050 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4051
4052 // Now, update the CFG.
4053 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4054
4055 if (DTU)
4056 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4057 {DominatorTree::Delete, PredBlock, BB}});
4058
4059 // If BI was a loop latch, it may have had associated loop metadata.
4060 // We need to copy it to the new latch, that is, PBI.
4061 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4062 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4063
4064 ValueToValueMapTy VMap; // maps original values to cloned values
4066
4067 Module *M = BB->getModule();
4068
4069 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4070 for (DbgVariableRecord &DVR :
4072 RemapDbgRecord(M, &DVR, VMap,
4074 }
4075
4076 // Now that the Cond was cloned into the predecessor basic block,
4077 // or/and the two conditions together.
4078 Value *BICond = VMap[BI->getCondition()];
4079 PBI->setCondition(
4080 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4082 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4083 if (!MDWeights.empty()) {
4084 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4085 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4086 /*IsExpected=*/false, /*ElideAllZero=*/true);
4087 }
4088
4089 ++NumFoldBranchToCommonDest;
4090 return true;
4091}
4092
4093/// Return if an instruction's type or any of its operands' types are a vector
4094/// type.
4095static bool isVectorOp(Instruction &I) {
4096 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4097 return U->getType()->isVectorTy();
4098 });
4099}
4100
4101/// If this basic block is simple enough, and if a predecessor branches to us
4102/// and one of our successors, fold the block into the predecessor and use
4103/// logical operations to pick the right destination.
4105 MemorySSAUpdater *MSSAU,
4106 const TargetTransformInfo *TTI,
4107 unsigned BonusInstThreshold) {
4108 // If this block ends with an unconditional branch,
4109 // let speculativelyExecuteBB() deal with it.
4110 if (!BI->isConditional())
4111 return false;
4112
4113 BasicBlock *BB = BI->getParent();
4117
4119
4121 Cond->getParent() != BB || !Cond->hasOneUse())
4122 return false;
4123
4124 // Finally, don't infinitely unroll conditional loops.
4125 if (is_contained(successors(BB), BB))
4126 return false;
4127
4128 // With which predecessors will we want to deal with?
4130 for (BasicBlock *PredBlock : predecessors(BB)) {
4131 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4132
4133 // Check that we have two conditional branches. If there is a PHI node in
4134 // the common successor, verify that the same value flows in from both
4135 // blocks.
4136 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4137 continue;
4138
4139 // Determine if the two branches share a common destination.
4140 BasicBlock *CommonSucc;
4142 bool InvertPredCond;
4143 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4144 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4145 else
4146 continue;
4147
4148 // Check the cost of inserting the necessary logic before performing the
4149 // transformation.
4150 if (TTI) {
4151 Type *Ty = BI->getCondition()->getType();
4152 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4153 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4154 !isa<CmpInst>(PBI->getCondition())))
4155 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4156
4158 continue;
4159 }
4160
4161 // Ok, we do want to deal with this predecessor. Record it.
4162 Preds.emplace_back(PredBlock);
4163 }
4164
4165 // If there aren't any predecessors into which we can fold,
4166 // don't bother checking the cost.
4167 if (Preds.empty())
4168 return false;
4169
4170 // Only allow this transformation if computing the condition doesn't involve
4171 // too many instructions and these involved instructions can be executed
4172 // unconditionally. We denote all involved instructions except the condition
4173 // as "bonus instructions", and only allow this transformation when the
4174 // number of the bonus instructions we'll need to create when cloning into
4175 // each predecessor does not exceed a certain threshold.
4176 unsigned NumBonusInsts = 0;
4177 bool SawVectorOp = false;
4178 const unsigned PredCount = Preds.size();
4179 for (Instruction &I : *BB) {
4180 // Don't check the branch condition comparison itself.
4181 if (&I == Cond)
4182 continue;
4183 // Ignore the terminator.
4184 if (isa<BranchInst>(I))
4185 continue;
4186 // I must be safe to execute unconditionally.
4188 return false;
4189 SawVectorOp |= isVectorOp(I);
4190
4191 // Account for the cost of duplicating this instruction into each
4192 // predecessor. Ignore free instructions.
4193 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4195 NumBonusInsts += PredCount;
4196
4197 // Early exits once we reach the limit.
4198 if (NumBonusInsts >
4199 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4200 return false;
4201 }
4202
4203 auto IsBCSSAUse = [BB, &I](Use &U) {
4204 auto *UI = cast<Instruction>(U.getUser());
4205 if (auto *PN = dyn_cast<PHINode>(UI))
4206 return PN->getIncomingBlock(U) == BB;
4207 return UI->getParent() == BB && I.comesBefore(UI);
4208 };
4209
4210 // Does this instruction require rewriting of uses?
4211 if (!all_of(I.uses(), IsBCSSAUse))
4212 return false;
4213 }
4214 if (NumBonusInsts >
4215 BonusInstThreshold *
4216 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4217 return false;
4218
4219 // Ok, we have the budget. Perform the transformation.
4220 for (BasicBlock *PredBlock : Preds) {
4221 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4222 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4223 }
4224 return false;
4225}
4226
4227// If there is only one store in BB1 and BB2, return it, otherwise return
4228// nullptr.
4230 StoreInst *S = nullptr;
4231 for (auto *BB : {BB1, BB2}) {
4232 if (!BB)
4233 continue;
4234 for (auto &I : *BB)
4235 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4236 if (S)
4237 // Multiple stores seen.
4238 return nullptr;
4239 else
4240 S = SI;
4241 }
4242 }
4243 return S;
4244}
4245
4247 Value *AlternativeV = nullptr) {
4248 // PHI is going to be a PHI node that allows the value V that is defined in
4249 // BB to be referenced in BB's only successor.
4250 //
4251 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4252 // doesn't matter to us what the other operand is (it'll never get used). We
4253 // could just create a new PHI with an undef incoming value, but that could
4254 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4255 // other PHI. So here we directly look for some PHI in BB's successor with V
4256 // as an incoming operand. If we find one, we use it, else we create a new
4257 // one.
4258 //
4259 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4260 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4261 // where OtherBB is the single other predecessor of BB's only successor.
4262 PHINode *PHI = nullptr;
4263 BasicBlock *Succ = BB->getSingleSuccessor();
4264
4265 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4266 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4267 PHI = cast<PHINode>(I);
4268 if (!AlternativeV)
4269 break;
4270
4271 assert(Succ->hasNPredecessors(2));
4272 auto PredI = pred_begin(Succ);
4273 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4274 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4275 break;
4276 PHI = nullptr;
4277 }
4278 if (PHI)
4279 return PHI;
4280
4281 // If V is not an instruction defined in BB, just return it.
4282 if (!AlternativeV &&
4283 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4284 return V;
4285
4286 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4287 PHI->insertBefore(Succ->begin());
4288 PHI->addIncoming(V, BB);
4289 for (BasicBlock *PredBB : predecessors(Succ))
4290 if (PredBB != BB)
4291 PHI->addIncoming(
4292 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4293 return PHI;
4294}
4295
4297 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4298 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4299 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4300 // For every pointer, there must be exactly two stores, one coming from
4301 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4302 // store (to any address) in PTB,PFB or QTB,QFB.
4303 // FIXME: We could relax this restriction with a bit more work and performance
4304 // testing.
4305 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4306 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4307 if (!PStore || !QStore)
4308 return false;
4309
4310 // Now check the stores are compatible.
4311 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4312 PStore->getValueOperand()->getType() !=
4313 QStore->getValueOperand()->getType())
4314 return false;
4315
4316 // Check that sinking the store won't cause program behavior changes. Sinking
4317 // the store out of the Q blocks won't change any behavior as we're sinking
4318 // from a block to its unconditional successor. But we're moving a store from
4319 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4320 // So we need to check that there are no aliasing loads or stores in
4321 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4322 // operations between PStore and the end of its parent block.
4323 //
4324 // The ideal way to do this is to query AliasAnalysis, but we don't
4325 // preserve AA currently so that is dangerous. Be super safe and just
4326 // check there are no other memory operations at all.
4327 for (auto &I : *QFB->getSinglePredecessor())
4328 if (I.mayReadOrWriteMemory())
4329 return false;
4330 for (auto &I : *QFB)
4331 if (&I != QStore && I.mayReadOrWriteMemory())
4332 return false;
4333 if (QTB)
4334 for (auto &I : *QTB)
4335 if (&I != QStore && I.mayReadOrWriteMemory())
4336 return false;
4337 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4338 I != E; ++I)
4339 if (&*I != PStore && I->mayReadOrWriteMemory())
4340 return false;
4341
4342 // If we're not in aggressive mode, we only optimize if we have some
4343 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4344 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4345 if (!BB)
4346 return true;
4347 // Heuristic: if the block can be if-converted/phi-folded and the
4348 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4349 // thread this store.
4350 InstructionCost Cost = 0;
4351 InstructionCost Budget =
4353 for (auto &I : BB->instructionsWithoutDebug(false)) {
4354 // Consider terminator instruction to be free.
4355 if (I.isTerminator())
4356 continue;
4357 // If this is one the stores that we want to speculate out of this BB,
4358 // then don't count it's cost, consider it to be free.
4359 if (auto *S = dyn_cast<StoreInst>(&I))
4360 if (llvm::find(FreeStores, S))
4361 continue;
4362 // Else, we have a white-list of instructions that we are ak speculating.
4364 return false; // Not in white-list - not worthwhile folding.
4365 // And finally, if this is a non-free instruction that we are okay
4366 // speculating, ensure that we consider the speculation budget.
4367 Cost +=
4368 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4369 if (Cost > Budget)
4370 return false; // Eagerly refuse to fold as soon as we're out of budget.
4371 }
4372 assert(Cost <= Budget &&
4373 "When we run out of budget we will eagerly return from within the "
4374 "per-instruction loop.");
4375 return true;
4376 };
4377
4378 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4380 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4381 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4382 return false;
4383
4384 // If PostBB has more than two predecessors, we need to split it so we can
4385 // sink the store.
4386 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4387 // We know that QFB's only successor is PostBB. And QFB has a single
4388 // predecessor. If QTB exists, then its only successor is also PostBB.
4389 // If QTB does not exist, then QFB's only predecessor has a conditional
4390 // branch to QFB and PostBB.
4391 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4392 BasicBlock *NewBB =
4393 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4394 if (!NewBB)
4395 return false;
4396 PostBB = NewBB;
4397 }
4398
4399 // OK, we're going to sink the stores to PostBB. The store has to be
4400 // conditional though, so first create the predicate.
4401 BranchInst *PBranch =
4403 BranchInst *QBranch =
4405 Value *PCond = PBranch->getCondition();
4406 Value *QCond = QBranch->getCondition();
4407
4409 PStore->getParent());
4411 QStore->getParent(), PPHI);
4412
4413 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4414 IRBuilder<> QB(PostBB, PostBBFirst);
4415 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4416
4417 InvertPCond ^= (PStore->getParent() != PTB);
4418 InvertQCond ^= (QStore->getParent() != QTB);
4419 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4420 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4421
4422 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4423
4424 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4425 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4426 /*Unreachable=*/false,
4427 /*BranchWeights=*/nullptr, DTU);
4428 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4430 SmallVector<uint32_t, 2> PWeights, QWeights;
4431 extractBranchWeights(*PBranch, PWeights);
4432 extractBranchWeights(*QBranch, QWeights);
4433 if (InvertPCond)
4434 std::swap(PWeights[0], PWeights[1]);
4435 if (InvertQCond)
4436 std::swap(QWeights[0], QWeights[1]);
4437 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4439 {CombinedWeights[0], CombinedWeights[1]},
4440 /*IsExpected=*/false, /*ElideAllZero=*/true);
4441 }
4442
4443 QB.SetInsertPoint(T);
4444 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4445 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4446 // Choose the minimum alignment. If we could prove both stores execute, we
4447 // could use biggest one. In this case, though, we only know that one of the
4448 // stores executes. And we don't know it's safe to take the alignment from a
4449 // store that doesn't execute.
4450 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4451
4452 QStore->eraseFromParent();
4453 PStore->eraseFromParent();
4454
4455 return true;
4456}
4457
4459 DomTreeUpdater *DTU, const DataLayout &DL,
4460 const TargetTransformInfo &TTI) {
4461 // The intention here is to find diamonds or triangles (see below) where each
4462 // conditional block contains a store to the same address. Both of these
4463 // stores are conditional, so they can't be unconditionally sunk. But it may
4464 // be profitable to speculatively sink the stores into one merged store at the
4465 // end, and predicate the merged store on the union of the two conditions of
4466 // PBI and QBI.
4467 //
4468 // This can reduce the number of stores executed if both of the conditions are
4469 // true, and can allow the blocks to become small enough to be if-converted.
4470 // This optimization will also chain, so that ladders of test-and-set
4471 // sequences can be if-converted away.
4472 //
4473 // We only deal with simple diamonds or triangles:
4474 //
4475 // PBI or PBI or a combination of the two
4476 // / \ | \
4477 // PTB PFB | PFB
4478 // \ / | /
4479 // QBI QBI
4480 // / \ | \
4481 // QTB QFB | QFB
4482 // \ / | /
4483 // PostBB PostBB
4484 //
4485 // We model triangles as a type of diamond with a nullptr "true" block.
4486 // Triangles are canonicalized so that the fallthrough edge is represented by
4487 // a true condition, as in the diagram above.
4488 BasicBlock *PTB = PBI->getSuccessor(0);
4489 BasicBlock *PFB = PBI->getSuccessor(1);
4490 BasicBlock *QTB = QBI->getSuccessor(0);
4491 BasicBlock *QFB = QBI->getSuccessor(1);
4492 BasicBlock *PostBB = QFB->getSingleSuccessor();
4493
4494 // Make sure we have a good guess for PostBB. If QTB's only successor is
4495 // QFB, then QFB is a better PostBB.
4496 if (QTB->getSingleSuccessor() == QFB)
4497 PostBB = QFB;
4498
4499 // If we couldn't find a good PostBB, stop.
4500 if (!PostBB)
4501 return false;
4502
4503 bool InvertPCond = false, InvertQCond = false;
4504 // Canonicalize fallthroughs to the true branches.
4505 if (PFB == QBI->getParent()) {
4506 std::swap(PFB, PTB);
4507 InvertPCond = true;
4508 }
4509 if (QFB == PostBB) {
4510 std::swap(QFB, QTB);
4511 InvertQCond = true;
4512 }
4513
4514 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4515 // and QFB may not. Model fallthroughs as a nullptr block.
4516 if (PTB == QBI->getParent())
4517 PTB = nullptr;
4518 if (QTB == PostBB)
4519 QTB = nullptr;
4520
4521 // Legality bailouts. We must have at least the non-fallthrough blocks and
4522 // the post-dominating block, and the non-fallthroughs must only have one
4523 // predecessor.
4524 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4525 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4526 };
4527 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4528 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4529 return false;
4530 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4531 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4532 return false;
4533 if (!QBI->getParent()->hasNUses(2))
4534 return false;
4535
4536 // OK, this is a sequence of two diamonds or triangles.
4537 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4538 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4539 for (auto *BB : {PTB, PFB}) {
4540 if (!BB)
4541 continue;
4542 for (auto &I : *BB)
4544 PStoreAddresses.insert(SI->getPointerOperand());
4545 }
4546 for (auto *BB : {QTB, QFB}) {
4547 if (!BB)
4548 continue;
4549 for (auto &I : *BB)
4551 QStoreAddresses.insert(SI->getPointerOperand());
4552 }
4553
4554 set_intersect(PStoreAddresses, QStoreAddresses);
4555 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4556 // clear what it contains.
4557 auto &CommonAddresses = PStoreAddresses;
4558
4559 bool Changed = false;
4560 for (auto *Address : CommonAddresses)
4561 Changed |=
4562 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4563 InvertPCond, InvertQCond, DTU, DL, TTI);
4564 return Changed;
4565}
4566
4567/// If the previous block ended with a widenable branch, determine if reusing
4568/// the target block is profitable and legal. This will have the effect of
4569/// "widening" PBI, but doesn't require us to reason about hosting safety.
4571 DomTreeUpdater *DTU) {
4572 // TODO: This can be generalized in two important ways:
4573 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4574 // values from the PBI edge.
4575 // 2) We can sink side effecting instructions into BI's fallthrough
4576 // successor provided they doesn't contribute to computation of
4577 // BI's condition.
4578 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4579 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4580 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4581 !BI->getParent()->getSinglePredecessor())
4582 return false;
4583 if (!IfFalseBB->phis().empty())
4584 return false; // TODO
4585 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4586 // may undo the transform done here.
4587 // TODO: There might be a more fine-grained solution to this.
4588 if (!llvm::succ_empty(IfFalseBB))
4589 return false;
4590 // Use lambda to lazily compute expensive condition after cheap ones.
4591 auto NoSideEffects = [](BasicBlock &BB) {
4592 return llvm::none_of(BB, [](const Instruction &I) {
4593 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4594 });
4595 };
4596 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4597 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4598 NoSideEffects(*BI->getParent())) {
4599 auto *OldSuccessor = BI->getSuccessor(1);
4600 OldSuccessor->removePredecessor(BI->getParent());
4601 BI->setSuccessor(1, IfFalseBB);
4602 if (DTU)
4603 DTU->applyUpdates(
4604 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4605 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4606 return true;
4607 }
4608 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4609 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4610 NoSideEffects(*BI->getParent())) {
4611 auto *OldSuccessor = BI->getSuccessor(0);
4612 OldSuccessor->removePredecessor(BI->getParent());
4613 BI->setSuccessor(0, IfFalseBB);
4614 if (DTU)
4615 DTU->applyUpdates(
4616 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4617 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4618 return true;
4619 }
4620 return false;
4621}
4622
4623/// If we have a conditional branch as a predecessor of another block,
4624/// this function tries to simplify it. We know
4625/// that PBI and BI are both conditional branches, and BI is in one of the
4626/// successor blocks of PBI - PBI branches to BI.
4628 DomTreeUpdater *DTU,
4629 const DataLayout &DL,
4630 const TargetTransformInfo &TTI) {
4631 assert(PBI->isConditional() && BI->isConditional());
4632 BasicBlock *BB = BI->getParent();
4633
4634 // If this block ends with a branch instruction, and if there is a
4635 // predecessor that ends on a branch of the same condition, make
4636 // this conditional branch redundant.
4637 if (PBI->getCondition() == BI->getCondition() &&
4638 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4639 // Okay, the outcome of this conditional branch is statically
4640 // knowable. If this block had a single pred, handle specially, otherwise
4641 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4642 if (BB->getSinglePredecessor()) {
4643 // Turn this into a branch on constant.
4644 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4645 BI->setCondition(
4646 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4647 return true; // Nuke the branch on constant.
4648 }
4649 }
4650
4651 // If the previous block ended with a widenable branch, determine if reusing
4652 // the target block is profitable and legal. This will have the effect of
4653 // "widening" PBI, but doesn't require us to reason about hosting safety.
4654 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4655 return true;
4656
4657 // If both branches are conditional and both contain stores to the same
4658 // address, remove the stores from the conditionals and create a conditional
4659 // merged store at the end.
4660 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4661 return true;
4662
4663 // If this is a conditional branch in an empty block, and if any
4664 // predecessors are a conditional branch to one of our destinations,
4665 // fold the conditions into logical ops and one cond br.
4666
4667 // Ignore dbg intrinsics.
4668 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4669 return false;
4670
4671 int PBIOp, BIOp;
4672 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4673 PBIOp = 0;
4674 BIOp = 0;
4675 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4676 PBIOp = 0;
4677 BIOp = 1;
4678 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4679 PBIOp = 1;
4680 BIOp = 0;
4681 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4682 PBIOp = 1;
4683 BIOp = 1;
4684 } else {
4685 return false;
4686 }
4687
4688 // Check to make sure that the other destination of this branch
4689 // isn't BB itself. If so, this is an infinite loop that will
4690 // keep getting unwound.
4691 if (PBI->getSuccessor(PBIOp) == BB)
4692 return false;
4693
4694 // If predecessor's branch probability to BB is too low don't merge branches.
4695 SmallVector<uint32_t, 2> PredWeights;
4696 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4697 extractBranchWeights(*PBI, PredWeights) &&
4698 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4699
4701 PredWeights[PBIOp],
4702 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4703
4704 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4705 if (CommonDestProb >= Likely)
4706 return false;
4707 }
4708
4709 // Do not perform this transformation if it would require
4710 // insertion of a large number of select instructions. For targets
4711 // without predication/cmovs, this is a big pessimization.
4712
4713 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4714 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4715 unsigned NumPhis = 0;
4716 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4717 ++II, ++NumPhis) {
4718 if (NumPhis > 2) // Disable this xform.
4719 return false;
4720 }
4721
4722 // Finally, if everything is ok, fold the branches to logical ops.
4723 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4724
4725 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4726 << "AND: " << *BI->getParent());
4727
4729
4730 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4731 // branch in it, where one edge (OtherDest) goes back to itself but the other
4732 // exits. We don't *know* that the program avoids the infinite loop
4733 // (even though that seems likely). If we do this xform naively, we'll end up
4734 // recursively unpeeling the loop. Since we know that (after the xform is
4735 // done) that the block *is* infinite if reached, we just make it an obviously
4736 // infinite loop with no cond branch.
4737 if (OtherDest == BB) {
4738 // Insert it at the end of the function, because it's either code,
4739 // or it won't matter if it's hot. :)
4740 BasicBlock *InfLoopBlock =
4741 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4742 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4743 if (DTU)
4744 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4745 OtherDest = InfLoopBlock;
4746 }
4747
4748 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4749
4750 // BI may have other predecessors. Because of this, we leave
4751 // it alone, but modify PBI.
4752
4753 // Make sure we get to CommonDest on True&True directions.
4754 Value *PBICond = PBI->getCondition();
4755 IRBuilder<NoFolder> Builder(PBI);
4756 if (PBIOp)
4757 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4758
4759 Value *BICond = BI->getCondition();
4760 if (BIOp)
4761 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4762
4763 // Merge the conditions.
4764 Value *Cond =
4765 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4766
4767 // Modify PBI to branch on the new condition to the new dests.
4768 PBI->setCondition(Cond);
4769 PBI->setSuccessor(0, CommonDest);
4770 PBI->setSuccessor(1, OtherDest);
4771
4772 if (DTU) {
4773 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4774 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4775
4776 DTU->applyUpdates(Updates);
4777 }
4778
4779 // Update branch weight for PBI.
4780 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4781 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4782 bool HasWeights =
4783 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4784 SuccTrueWeight, SuccFalseWeight);
4785 if (HasWeights) {
4786 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4787 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4788 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4789 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4790 // The weight to CommonDest should be PredCommon * SuccTotal +
4791 // PredOther * SuccCommon.
4792 // The weight to OtherDest should be PredOther * SuccOther.
4793 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4794 PredOther * SuccCommon,
4795 PredOther * SuccOther};
4796
4797 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4798 /*ElideAllZero=*/true);
4799 // Cond may be a select instruction with the first operand set to "true", or
4800 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4802 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4803 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4804 // The select is predicated on PBICond
4806 // The corresponding probabilities are what was referred to above as
4807 // PredCommon and PredOther.
4808 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4809 /*IsExpected=*/false, /*ElideAllZero=*/true);
4810 }
4811 }
4812
4813 // OtherDest may have phi nodes. If so, add an entry from PBI's
4814 // block that are identical to the entries for BI's block.
4815 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4816
4817 // We know that the CommonDest already had an edge from PBI to
4818 // it. If it has PHIs though, the PHIs may have different
4819 // entries for BB and PBI's BB. If so, insert a select to make
4820 // them agree.
4821 for (PHINode &PN : CommonDest->phis()) {
4822 Value *BIV = PN.getIncomingValueForBlock(BB);
4823 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4824 Value *PBIV = PN.getIncomingValue(PBBIdx);
4825 if (BIV != PBIV) {
4826 // Insert a select in PBI to pick the right value.
4828 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4829 PN.setIncomingValue(PBBIdx, NV);
4830 // The select has the same condition as PBI, in the same BB. The
4831 // probabilities don't change.
4832 if (HasWeights) {
4833 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4834 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4835 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4836 /*IsExpected=*/false, /*ElideAllZero=*/true);
4837 }
4838 }
4839 }
4840
4841 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4842 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4843
4844 // This basic block is probably dead. We know it has at least
4845 // one fewer predecessor.
4846 return true;
4847}
4848
4849// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4850// true or to FalseBB if Cond is false.
4851// Takes care of updating the successors and removing the old terminator.
4852// Also makes sure not to introduce new successors by assuming that edges to
4853// non-successor TrueBBs and FalseBBs aren't reachable.
4854bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4855 Value *Cond, BasicBlock *TrueBB,
4856 BasicBlock *FalseBB,
4857 uint32_t TrueWeight,
4858 uint32_t FalseWeight) {
4859 auto *BB = OldTerm->getParent();
4860 // Remove any superfluous successor edges from the CFG.
4861 // First, figure out which successors to preserve.
4862 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4863 // successor.
4864 BasicBlock *KeepEdge1 = TrueBB;
4865 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4866
4867 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4868
4869 // Then remove the rest.
4870 for (BasicBlock *Succ : successors(OldTerm)) {
4871 // Make sure only to keep exactly one copy of each edge.
4872 if (Succ == KeepEdge1)
4873 KeepEdge1 = nullptr;
4874 else if (Succ == KeepEdge2)
4875 KeepEdge2 = nullptr;
4876 else {
4877 Succ->removePredecessor(BB,
4878 /*KeepOneInputPHIs=*/true);
4879
4880 if (Succ != TrueBB && Succ != FalseBB)
4881 RemovedSuccessors.insert(Succ);
4882 }
4883 }
4884
4885 IRBuilder<> Builder(OldTerm);
4886 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4887
4888 // Insert an appropriate new terminator.
4889 if (!KeepEdge1 && !KeepEdge2) {
4890 if (TrueBB == FalseBB) {
4891 // We were only looking for one successor, and it was present.
4892 // Create an unconditional branch to it.
4893 Builder.CreateBr(TrueBB);
4894 } else {
4895 // We found both of the successors we were looking for.
4896 // Create a conditional branch sharing the condition of the select.
4897 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4898 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4899 /*IsExpected=*/false, /*ElideAllZero=*/true);
4900 }
4901 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4902 // Neither of the selected blocks were successors, so this
4903 // terminator must be unreachable.
4904 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4905 } else {
4906 // One of the selected values was a successor, but the other wasn't.
4907 // Insert an unconditional branch to the one that was found;
4908 // the edge to the one that wasn't must be unreachable.
4909 if (!KeepEdge1) {
4910 // Only TrueBB was found.
4911 Builder.CreateBr(TrueBB);
4912 } else {
4913 // Only FalseBB was found.
4914 Builder.CreateBr(FalseBB);
4915 }
4916 }
4917
4919
4920 if (DTU) {
4921 SmallVector<DominatorTree::UpdateType, 2> Updates;
4922 Updates.reserve(RemovedSuccessors.size());
4923 for (auto *RemovedSuccessor : RemovedSuccessors)
4924 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4925 DTU->applyUpdates(Updates);
4926 }
4927
4928 return true;
4929}
4930
4931// Replaces
4932// (switch (select cond, X, Y)) on constant X, Y
4933// with a branch - conditional if X and Y lead to distinct BBs,
4934// unconditional otherwise.
4935bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4936 SelectInst *Select) {
4937 // Check for constant integer values in the select.
4938 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4939 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4940 if (!TrueVal || !FalseVal)
4941 return false;
4942
4943 // Find the relevant condition and destinations.
4944 Value *Condition = Select->getCondition();
4945 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4946 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4947
4948 // Get weight for TrueBB and FalseBB.
4949 uint32_t TrueWeight = 0, FalseWeight = 0;
4950 SmallVector<uint64_t, 8> Weights;
4951 bool HasWeights = hasBranchWeightMD(*SI);
4952 if (HasWeights) {
4953 getBranchWeights(SI, Weights);
4954 if (Weights.size() == 1 + SI->getNumCases()) {
4955 TrueWeight =
4956 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4957 FalseWeight =
4958 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4959 }
4960 }
4961
4962 // Perform the actual simplification.
4963 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4964 FalseWeight);
4965}
4966
4967// Replaces
4968// (indirectbr (select cond, blockaddress(@fn, BlockA),
4969// blockaddress(@fn, BlockB)))
4970// with
4971// (br cond, BlockA, BlockB).
4972bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4973 SelectInst *SI) {
4974 // Check that both operands of the select are block addresses.
4975 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4976 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4977 if (!TBA || !FBA)
4978 return false;
4979
4980 // Extract the actual blocks.
4981 BasicBlock *TrueBB = TBA->getBasicBlock();
4982 BasicBlock *FalseBB = FBA->getBasicBlock();
4983
4984 // The select's profile becomes the profile of the conditional branch that
4985 // replaces the indirect branch.
4986 SmallVector<uint32_t> SelectBranchWeights(2);
4988 extractBranchWeights(*SI, SelectBranchWeights);
4989 // Perform the actual simplification.
4990 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
4991 SelectBranchWeights[0],
4992 SelectBranchWeights[1]);
4993}
4994
4995/// This is called when we find an icmp instruction
4996/// (a seteq/setne with a constant) as the only instruction in a
4997/// block that ends with an uncond branch. We are looking for a very specific
4998/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4999/// this case, we merge the first two "or's of icmp" into a switch, but then the
5000/// default value goes to an uncond block with a seteq in it, we get something
5001/// like:
5002///
5003/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5004/// DEFAULT:
5005/// %tmp = icmp eq i8 %A, 92
5006/// br label %end
5007/// end:
5008/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5009///
5010/// We prefer to split the edge to 'end' so that there is a true/false entry to
5011/// the PHI, merging the third icmp into the switch.
5012bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5013 ICmpInst *ICI, IRBuilder<> &Builder) {
5014 BasicBlock *BB = ICI->getParent();
5015
5016 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5017 // complex.
5018 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5019 return false;
5020
5021 Value *V = ICI->getOperand(0);
5022 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5023
5024 // The pattern we're looking for is where our only predecessor is a switch on
5025 // 'V' and this block is the default case for the switch. In this case we can
5026 // fold the compared value into the switch to simplify things.
5027 BasicBlock *Pred = BB->getSinglePredecessor();
5028 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5029 return false;
5030
5031 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5032 if (SI->getCondition() != V)
5033 return false;
5034
5035 // If BB is reachable on a non-default case, then we simply know the value of
5036 // V in this block. Substitute it and constant fold the icmp instruction
5037 // away.
5038 if (SI->getDefaultDest() != BB) {
5039 ConstantInt *VVal = SI->findCaseDest(BB);
5040 assert(VVal && "Should have a unique destination value");
5041 ICI->setOperand(0, VVal);
5042
5043 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5044 ICI->replaceAllUsesWith(V);
5045 ICI->eraseFromParent();
5046 }
5047 // BB is now empty, so it is likely to simplify away.
5048 return requestResimplify();
5049 }
5050
5051 // Ok, the block is reachable from the default dest. If the constant we're
5052 // comparing exists in one of the other edges, then we can constant fold ICI
5053 // and zap it.
5054 if (SI->findCaseValue(Cst) != SI->case_default()) {
5055 Value *V;
5056 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5058 else
5060
5061 ICI->replaceAllUsesWith(V);
5062 ICI->eraseFromParent();
5063 // BB is now empty, so it is likely to simplify away.
5064 return requestResimplify();
5065 }
5066
5067 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5068 // the block.
5069 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5070 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5071 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5073 return false;
5074
5075 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5076 // true in the PHI.
5077 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5078 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5079
5080 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5081 std::swap(DefaultCst, NewCst);
5082
5083 // Replace ICI (which is used by the PHI for the default value) with true or
5084 // false depending on if it is EQ or NE.
5085 ICI->replaceAllUsesWith(DefaultCst);
5086 ICI->eraseFromParent();
5087
5088 SmallVector<DominatorTree::UpdateType, 2> Updates;
5089
5090 // Okay, the switch goes to this block on a default value. Add an edge from
5091 // the switch to the merge point on the compared value.
5092 BasicBlock *NewBB =
5093 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5094 {
5095 SwitchInstProfUpdateWrapper SIW(*SI);
5096 auto W0 = SIW.getSuccessorWeight(0);
5098 if (W0) {
5099 NewW = ((uint64_t(*W0) + 1) >> 1);
5100 SIW.setSuccessorWeight(0, *NewW);
5101 }
5102 SIW.addCase(Cst, NewBB, NewW);
5103 if (DTU)
5104 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5105 }
5106
5107 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5108 Builder.SetInsertPoint(NewBB);
5109 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5110 Builder.CreateBr(SuccBlock);
5111 PHIUse->addIncoming(NewCst, NewBB);
5112 if (DTU) {
5113 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5114 DTU->applyUpdates(Updates);
5115 }
5116 return true;
5117}
5118
5119/// The specified branch is a conditional branch.
5120/// Check to see if it is branching on an or/and chain of icmp instructions, and
5121/// fold it into a switch instruction if so.
5122bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5123 IRBuilder<> &Builder,
5124 const DataLayout &DL) {
5126 if (!Cond)
5127 return false;
5128
5129 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5130 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5131 // 'setne's and'ed together, collect them.
5132
5133 // Try to gather values from a chain of and/or to be turned into a switch
5134 ConstantComparesGatherer ConstantCompare(Cond, DL);
5135 // Unpack the result
5136 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5137 Value *CompVal = ConstantCompare.CompValue;
5138 unsigned UsedICmps = ConstantCompare.UsedICmps;
5139 Value *ExtraCase = ConstantCompare.Extra;
5140 bool TrueWhenEqual = ConstantCompare.IsEq;
5141
5142 // If we didn't have a multiply compared value, fail.
5143 if (!CompVal)
5144 return false;
5145
5146 // Avoid turning single icmps into a switch.
5147 if (UsedICmps <= 1)
5148 return false;
5149
5150 // There might be duplicate constants in the list, which the switch
5151 // instruction can't handle, remove them now.
5152 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5153 Values.erase(llvm::unique(Values), Values.end());
5154
5155 // If Extra was used, we require at least two switch values to do the
5156 // transformation. A switch with one value is just a conditional branch.
5157 if (ExtraCase && Values.size() < 2)
5158 return false;
5159
5160 SmallVector<uint32_t> BranchWeights;
5161 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5162 extractBranchWeights(*BI, BranchWeights);
5163
5164 // Figure out which block is which destination.
5165 BasicBlock *DefaultBB = BI->getSuccessor(1);
5166 BasicBlock *EdgeBB = BI->getSuccessor(0);
5167 if (!TrueWhenEqual) {
5168 std::swap(DefaultBB, EdgeBB);
5169 if (HasProfile)
5170 std::swap(BranchWeights[0], BranchWeights[1]);
5171 }
5172
5173 BasicBlock *BB = BI->getParent();
5174
5175 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5176 << " cases into SWITCH. BB is:\n"
5177 << *BB);
5178
5179 SmallVector<DominatorTree::UpdateType, 2> Updates;
5180
5181 // If there are any extra values that couldn't be folded into the switch
5182 // then we evaluate them with an explicit branch first. Split the block
5183 // right before the condbr to handle it.
5184 if (ExtraCase) {
5185 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5186 /*MSSAU=*/nullptr, "switch.early.test");
5187
5188 // Remove the uncond branch added to the old block.
5189 Instruction *OldTI = BB->getTerminator();
5190 Builder.SetInsertPoint(OldTI);
5191
5192 // There can be an unintended UB if extra values are Poison. Before the
5193 // transformation, extra values may not be evaluated according to the
5194 // condition, and it will not raise UB. But after transformation, we are
5195 // evaluating extra values before checking the condition, and it will raise
5196 // UB. It can be solved by adding freeze instruction to extra values.
5197 AssumptionCache *AC = Options.AC;
5198
5199 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5200 ExtraCase = Builder.CreateFreeze(ExtraCase);
5201
5202 // We don't have any info about this condition.
5203 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5204 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5206 DEBUG_TYPE);
5207
5208 OldTI->eraseFromParent();
5209
5210 if (DTU)
5211 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5212
5213 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5214 // for the edge we just added.
5215 addPredecessorToBlock(EdgeBB, BB, NewBB);
5216
5217 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5218 << "\nEXTRABB = " << *BB);
5219 BB = NewBB;
5220 }
5221
5222 Builder.SetInsertPoint(BI);
5223 // Convert pointer to int before we switch.
5224 if (CompVal->getType()->isPointerTy()) {
5225 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5226 "Should not end up here with unstable pointers");
5227 CompVal = Builder.CreatePtrToInt(
5228 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5229 }
5230
5231 // Check if we can represent the values as a contiguous range. If so, we use a
5232 // range check + conditional branch instead of a switch.
5233 if (Values.front()->getValue() - Values.back()->getValue() ==
5234 Values.size() - 1) {
5235 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5236 Values.back()->getValue(), Values.front()->getValue() + 1);
5237 APInt Offset, RHS;
5238 ICmpInst::Predicate Pred;
5239 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5240 Value *X = CompVal;
5241 if (!Offset.isZero())
5242 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5243 Value *Cond =
5244 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5245 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5246 if (HasProfile)
5247 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5248 // We don't need to update PHI nodes since we don't add any new edges.
5249 } else {
5250 // Create the new switch instruction now.
5251 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5252 if (HasProfile) {
5253 // We know the weight of the default case. We don't know the weight of the
5254 // other cases, but rather than completely lose profiling info, we split
5255 // the remaining probability equally over them.
5256 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5257 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5258 // if TrueWhenEqual.
5259 for (auto &V : drop_begin(NewWeights))
5260 V = BranchWeights[0] / Values.size();
5261 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5262 }
5263
5264 // Add all of the 'cases' to the switch instruction.
5265 for (ConstantInt *Val : Values)
5266 New->addCase(Val, EdgeBB);
5267
5268 // We added edges from PI to the EdgeBB. As such, if there were any
5269 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5270 // the number of edges added.
5271 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5272 PHINode *PN = cast<PHINode>(BBI);
5273 Value *InVal = PN->getIncomingValueForBlock(BB);
5274 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5275 PN->addIncoming(InVal, BB);
5276 }
5277 }
5278
5279 // Erase the old branch instruction.
5281 if (DTU)
5282 DTU->applyUpdates(Updates);
5283
5284 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5285 return true;
5286}
5287
5288bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5289 if (isa<PHINode>(RI->getValue()))
5290 return simplifyCommonResume(RI);
5291 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5292 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5293 // The resume must unwind the exception that caused control to branch here.
5294 return simplifySingleResume(RI);
5295
5296 return false;
5297}
5298
5299// Check if cleanup block is empty
5301 for (Instruction &I : R) {
5302 auto *II = dyn_cast<IntrinsicInst>(&I);
5303 if (!II)
5304 return false;
5305
5306 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5307 switch (IntrinsicID) {
5308 case Intrinsic::dbg_declare:
5309 case Intrinsic::dbg_value:
5310 case Intrinsic::dbg_label:
5311 case Intrinsic::lifetime_end:
5312 break;
5313 default:
5314 return false;
5315 }
5316 }
5317 return true;
5318}
5319
5320// Simplify resume that is shared by several landing pads (phi of landing pad).
5321bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5322 BasicBlock *BB = RI->getParent();
5323
5324 // Check that there are no other instructions except for debug and lifetime
5325 // intrinsics between the phi's and resume instruction.
5326 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5327 BB->getTerminator()->getIterator())))
5328 return false;
5329
5330 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5331 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5332
5333 // Check incoming blocks to see if any of them are trivial.
5334 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5335 Idx++) {
5336 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5337 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5338
5339 // If the block has other successors, we can not delete it because
5340 // it has other dependents.
5341 if (IncomingBB->getUniqueSuccessor() != BB)
5342 continue;
5343
5344 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5345 // Not the landing pad that caused the control to branch here.
5346 if (IncomingValue != LandingPad)
5347 continue;
5348
5350 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5351 TrivialUnwindBlocks.insert(IncomingBB);
5352 }
5353
5354 // If no trivial unwind blocks, don't do any simplifications.
5355 if (TrivialUnwindBlocks.empty())
5356 return false;
5357
5358 // Turn all invokes that unwind here into calls.
5359 for (auto *TrivialBB : TrivialUnwindBlocks) {
5360 // Blocks that will be simplified should be removed from the phi node.
5361 // Note there could be multiple edges to the resume block, and we need
5362 // to remove them all.
5363 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5364 BB->removePredecessor(TrivialBB, true);
5365
5366 for (BasicBlock *Pred :
5368 removeUnwindEdge(Pred, DTU);
5369 ++NumInvokes;
5370 }
5371
5372 // In each SimplifyCFG run, only the current processed block can be erased.
5373 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5374 // of erasing TrivialBB, we only remove the branch to the common resume
5375 // block so that we can later erase the resume block since it has no
5376 // predecessors.
5377 TrivialBB->getTerminator()->eraseFromParent();
5378 new UnreachableInst(RI->getContext(), TrivialBB);
5379 if (DTU)
5380 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5381 }
5382
5383 // Delete the resume block if all its predecessors have been removed.
5384 if (pred_empty(BB))
5385 DeleteDeadBlock(BB, DTU);
5386
5387 return !TrivialUnwindBlocks.empty();
5388}
5389
5390// Simplify resume that is only used by a single (non-phi) landing pad.
5391bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5392 BasicBlock *BB = RI->getParent();
5393 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5394 assert(RI->getValue() == LPInst &&
5395 "Resume must unwind the exception that caused control to here");
5396
5397 // Check that there are no other instructions except for debug intrinsics.
5399 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5400 return false;
5401
5402 // Turn all invokes that unwind here into calls and delete the basic block.
5403 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5404 removeUnwindEdge(Pred, DTU);
5405 ++NumInvokes;
5406 }
5407
5408 // The landingpad is now unreachable. Zap it.
5409 DeleteDeadBlock(BB, DTU);
5410 return true;
5411}
5412
5414 // If this is a trivial cleanup pad that executes no instructions, it can be
5415 // eliminated. If the cleanup pad continues to the caller, any predecessor
5416 // that is an EH pad will be updated to continue to the caller and any
5417 // predecessor that terminates with an invoke instruction will have its invoke
5418 // instruction converted to a call instruction. If the cleanup pad being
5419 // simplified does not continue to the caller, each predecessor will be
5420 // updated to continue to the unwind destination of the cleanup pad being
5421 // simplified.
5422 BasicBlock *BB = RI->getParent();
5423 CleanupPadInst *CPInst = RI->getCleanupPad();
5424 if (CPInst->getParent() != BB)
5425 // This isn't an empty cleanup.
5426 return false;
5427
5428 // We cannot kill the pad if it has multiple uses. This typically arises
5429 // from unreachable basic blocks.
5430 if (!CPInst->hasOneUse())
5431 return false;
5432
5433 // Check that there are no other instructions except for benign intrinsics.
5435 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5436 return false;
5437
5438 // If the cleanup return we are simplifying unwinds to the caller, this will
5439 // set UnwindDest to nullptr.
5440 BasicBlock *UnwindDest = RI->getUnwindDest();
5441
5442 // We're about to remove BB from the control flow. Before we do, sink any
5443 // PHINodes into the unwind destination. Doing this before changing the
5444 // control flow avoids some potentially slow checks, since we can currently
5445 // be certain that UnwindDest and BB have no common predecessors (since they
5446 // are both EH pads).
5447 if (UnwindDest) {
5448 // First, go through the PHI nodes in UnwindDest and update any nodes that
5449 // reference the block we are removing
5450 for (PHINode &DestPN : UnwindDest->phis()) {
5451 int Idx = DestPN.getBasicBlockIndex(BB);
5452 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5453 assert(Idx != -1);
5454 // This PHI node has an incoming value that corresponds to a control
5455 // path through the cleanup pad we are removing. If the incoming
5456 // value is in the cleanup pad, it must be a PHINode (because we
5457 // verified above that the block is otherwise empty). Otherwise, the
5458 // value is either a constant or a value that dominates the cleanup
5459 // pad being removed.
5460 //
5461 // Because BB and UnwindDest are both EH pads, all of their
5462 // predecessors must unwind to these blocks, and since no instruction
5463 // can have multiple unwind destinations, there will be no overlap in
5464 // incoming blocks between SrcPN and DestPN.
5465 Value *SrcVal = DestPN.getIncomingValue(Idx);
5466 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5467
5468 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5469 for (auto *Pred : predecessors(BB)) {
5470 Value *Incoming =
5471 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5472 DestPN.addIncoming(Incoming, Pred);
5473 }
5474 }
5475
5476 // Sink any remaining PHI nodes directly into UnwindDest.
5477 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5478 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5479 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5480 // If the PHI node has no uses or all of its uses are in this basic
5481 // block (meaning they are debug or lifetime intrinsics), just leave
5482 // it. It will be erased when we erase BB below.
5483 continue;
5484
5485 // Otherwise, sink this PHI node into UnwindDest.
5486 // Any predecessors to UnwindDest which are not already represented
5487 // must be back edges which inherit the value from the path through
5488 // BB. In this case, the PHI value must reference itself.
5489 for (auto *pred : predecessors(UnwindDest))
5490 if (pred != BB)
5491 PN.addIncoming(&PN, pred);
5492 PN.moveBefore(InsertPt);
5493 // Also, add a dummy incoming value for the original BB itself,
5494 // so that the PHI is well-formed until we drop said predecessor.
5495 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5496 }
5497 }
5498
5499 std::vector<DominatorTree::UpdateType> Updates;
5500
5501 // We use make_early_inc_range here because we will remove all predecessors.
5503 if (UnwindDest == nullptr) {
5504 if (DTU) {
5505 DTU->applyUpdates(Updates);
5506 Updates.clear();
5507 }
5508 removeUnwindEdge(PredBB, DTU);
5509 ++NumInvokes;
5510 } else {
5511 BB->removePredecessor(PredBB);
5512 Instruction *TI = PredBB->getTerminator();
5513 TI->replaceUsesOfWith(BB, UnwindDest);
5514 if (DTU) {
5515 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5516 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5517 }
5518 }
5519 }
5520
5521 if (DTU)
5522 DTU->applyUpdates(Updates);
5523
5524 DeleteDeadBlock(BB, DTU);
5525
5526 return true;
5527}
5528
5529// Try to merge two cleanuppads together.
5531 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5532 // with.
5533 BasicBlock *UnwindDest = RI->getUnwindDest();
5534 if (!UnwindDest)
5535 return false;
5536
5537 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5538 // be safe to merge without code duplication.
5539 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5540 return false;
5541
5542 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5543 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5544 if (!SuccessorCleanupPad)
5545 return false;
5546
5547 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5548 // Replace any uses of the successor cleanupad with the predecessor pad
5549 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5550 // funclet bundle operands.
5551 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5552 // Remove the old cleanuppad.
5553 SuccessorCleanupPad->eraseFromParent();
5554 // Now, we simply replace the cleanupret with a branch to the unwind
5555 // destination.
5556 BranchInst::Create(UnwindDest, RI->getParent());
5557 RI->eraseFromParent();
5558
5559 return true;
5560}
5561
5562bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5563 // It is possible to transiantly have an undef cleanuppad operand because we
5564 // have deleted some, but not all, dead blocks.
5565 // Eventually, this block will be deleted.
5566 if (isa<UndefValue>(RI->getOperand(0)))
5567 return false;
5568
5569 if (mergeCleanupPad(RI))
5570 return true;
5571
5572 if (removeEmptyCleanup(RI, DTU))
5573 return true;
5574
5575 return false;
5576}
5577
5578// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5579bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5580 BasicBlock *BB = UI->getParent();
5581
5582 bool Changed = false;
5583
5584 // Ensure that any debug-info records that used to occur after the Unreachable
5585 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5586 // the block.
5588
5589 // Debug-info records on the unreachable inst itself should be deleted, as
5590 // below we delete everything past the final executable instruction.
5591 UI->dropDbgRecords();
5592
5593 // If there are any instructions immediately before the unreachable that can
5594 // be removed, do so.
5595 while (UI->getIterator() != BB->begin()) {
5597 --BBI;
5598
5600 break; // Can not drop any more instructions. We're done here.
5601 // Otherwise, this instruction can be freely erased,
5602 // even if it is not side-effect free.
5603
5604 // Note that deleting EH's here is in fact okay, although it involves a bit
5605 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5606 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5607 // and we can therefore guarantee this block will be erased.
5608
5609 // If we're deleting this, we're deleting any subsequent debug info, so
5610 // delete DbgRecords.
5611 BBI->dropDbgRecords();
5612
5613 // Delete this instruction (any uses are guaranteed to be dead)
5614 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5615 BBI->eraseFromParent();
5616 Changed = true;
5617 }
5618
5619 // If the unreachable instruction is the first in the block, take a gander
5620 // at all of the predecessors of this instruction, and simplify them.
5621 if (&BB->front() != UI)
5622 return Changed;
5623
5624 std::vector<DominatorTree::UpdateType> Updates;
5625
5626 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5627 for (BasicBlock *Predecessor : Preds) {
5628 Instruction *TI = Predecessor->getTerminator();
5629 IRBuilder<> Builder(TI);
5630 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5631 // We could either have a proper unconditional branch,
5632 // or a degenerate conditional branch with matching destinations.
5633 if (all_of(BI->successors(),
5634 [BB](auto *Successor) { return Successor == BB; })) {
5635 new UnreachableInst(TI->getContext(), TI->getIterator());
5636 TI->eraseFromParent();
5637 Changed = true;
5638 } else {
5639 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5640 Value* Cond = BI->getCondition();
5641 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5642 "The destinations are guaranteed to be different here.");
5643 CallInst *Assumption;
5644 if (BI->getSuccessor(0) == BB) {
5645 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5646 Builder.CreateBr(BI->getSuccessor(1));
5647 } else {
5648 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5649 Assumption = Builder.CreateAssumption(Cond);
5650 Builder.CreateBr(BI->getSuccessor(0));
5651 }
5652 if (Options.AC)
5653 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5654
5656 Changed = true;
5657 }
5658 if (DTU)
5659 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5660 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5661 SwitchInstProfUpdateWrapper SU(*SI);
5662 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5663 if (i->getCaseSuccessor() != BB) {
5664 ++i;
5665 continue;
5666 }
5667 BB->removePredecessor(SU->getParent());
5668 i = SU.removeCase(i);
5669 e = SU->case_end();
5670 Changed = true;
5671 }
5672 // Note that the default destination can't be removed!
5673 if (DTU && SI->getDefaultDest() != BB)
5674 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5675 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5676 if (II->getUnwindDest() == BB) {
5677 if (DTU) {
5678 DTU->applyUpdates(Updates);
5679 Updates.clear();
5680 }
5681 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5682 if (!CI->doesNotThrow())
5683 CI->setDoesNotThrow();
5684 Changed = true;
5685 }
5686 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5687 if (CSI->getUnwindDest() == BB) {
5688 if (DTU) {
5689 DTU->applyUpdates(Updates);
5690 Updates.clear();
5691 }
5692 removeUnwindEdge(TI->getParent(), DTU);
5693 Changed = true;
5694 continue;
5695 }
5696
5697 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5698 E = CSI->handler_end();
5699 I != E; ++I) {
5700 if (*I == BB) {
5701 CSI->removeHandler(I);
5702 --I;
5703 --E;
5704 Changed = true;
5705 }
5706 }
5707 if (DTU)
5708 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5709 if (CSI->getNumHandlers() == 0) {
5710 if (CSI->hasUnwindDest()) {
5711 // Redirect all predecessors of the block containing CatchSwitchInst
5712 // to instead branch to the CatchSwitchInst's unwind destination.
5713 if (DTU) {
5714 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5715 Updates.push_back({DominatorTree::Insert,
5716 PredecessorOfPredecessor,
5717 CSI->getUnwindDest()});
5718 Updates.push_back({DominatorTree::Delete,
5719 PredecessorOfPredecessor, Predecessor});
5720 }
5721 }
5722 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5723 } else {
5724 // Rewrite all preds to unwind to caller (or from invoke to call).
5725 if (DTU) {
5726 DTU->applyUpdates(Updates);
5727 Updates.clear();
5728 }
5729 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5730 for (BasicBlock *EHPred : EHPreds)
5731 removeUnwindEdge(EHPred, DTU);
5732 }
5733 // The catchswitch is no longer reachable.
5734 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5735 CSI->eraseFromParent();
5736 Changed = true;
5737 }
5738 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5739 (void)CRI;
5740 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5741 "Expected to always have an unwind to BB.");
5742 if (DTU)
5743 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5744 new UnreachableInst(TI->getContext(), TI->getIterator());
5745 TI->eraseFromParent();
5746 Changed = true;
5747 }
5748 }
5749
5750 if (DTU)
5751 DTU->applyUpdates(Updates);
5752
5753 // If this block is now dead, remove it.
5754 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5755 DeleteDeadBlock(BB, DTU);
5756 return true;
5757 }
5758
5759 return Changed;
5760}
5761
5770
5771static std::optional<ContiguousCasesResult>
5774 BasicBlock *Dest, BasicBlock *OtherDest) {
5775 assert(Cases.size() >= 1);
5776
5778 const APInt &Min = Cases.back()->getValue();
5779 const APInt &Max = Cases.front()->getValue();
5780 APInt Offset = Max - Min;
5781 size_t ContiguousOffset = Cases.size() - 1;
5782 if (Offset == ContiguousOffset) {
5783 return ContiguousCasesResult{
5784 /*Min=*/Cases.back(),
5785 /*Max=*/Cases.front(),
5786 /*Dest=*/Dest,
5787 /*OtherDest=*/OtherDest,
5788 /*Cases=*/&Cases,
5789 /*OtherCases=*/&OtherCases,
5790 };
5791 }
5792 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5793 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5794 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5795 // contiguous range for the other destination. N.B. If CR is not a full range,
5796 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5797 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5798 assert(Cases.size() >= 2);
5799 auto *It =
5800 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5801 return L->getValue() != R->getValue() + 1;
5802 });
5803 if (It == Cases.end())
5804 return std::nullopt;
5805 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5806 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5807 Cases.size() - 2) {
5808 return ContiguousCasesResult{
5809 /*Min=*/cast<ConstantInt>(
5810 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5811 /*Max=*/
5813 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5814 /*Dest=*/OtherDest,
5815 /*OtherDest=*/Dest,
5816 /*Cases=*/&OtherCases,
5817 /*OtherCases=*/&Cases,
5818 };
5819 }
5820 }
5821 return std::nullopt;
5822}
5823
5825 DomTreeUpdater *DTU,
5826 bool RemoveOrigDefaultBlock = true) {
5827 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5828 auto *BB = Switch->getParent();
5829 auto *OrigDefaultBlock = Switch->getDefaultDest();
5830 if (RemoveOrigDefaultBlock)
5831 OrigDefaultBlock->removePredecessor(BB);
5832 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5833 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5834 OrigDefaultBlock);
5835 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5837 Switch->setDefaultDest(&*NewDefaultBlock);
5838 if (DTU) {
5840 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5841 if (RemoveOrigDefaultBlock &&
5842 !is_contained(successors(BB), OrigDefaultBlock))
5843 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5844 DTU->applyUpdates(Updates);
5845 }
5846}
5847
5848/// Turn a switch into an integer range comparison and branch.
5849/// Switches with more than 2 destinations are ignored.
5850/// Switches with 1 destination are also ignored.
5851bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5852 IRBuilder<> &Builder) {
5853 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5854
5855 bool HasDefault = !SI->defaultDestUnreachable();
5856
5857 auto *BB = SI->getParent();
5858 // Partition the cases into two sets with different destinations.
5859 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5860 BasicBlock *DestB = nullptr;
5863
5864 for (auto Case : SI->cases()) {
5865 BasicBlock *Dest = Case.getCaseSuccessor();
5866 if (!DestA)
5867 DestA = Dest;
5868 if (Dest == DestA) {
5869 CasesA.push_back(Case.getCaseValue());
5870 continue;
5871 }
5872 if (!DestB)
5873 DestB = Dest;
5874 if (Dest == DestB) {
5875 CasesB.push_back(Case.getCaseValue());
5876 continue;
5877 }
5878 return false; // More than two destinations.
5879 }
5880 if (!DestB)
5881 return false; // All destinations are the same and the default is unreachable
5882
5883 assert(DestA && DestB &&
5884 "Single-destination switch should have been folded.");
5885 assert(DestA != DestB);
5886 assert(DestB != SI->getDefaultDest());
5887 assert(!CasesB.empty() && "There must be non-default cases.");
5888 assert(!CasesA.empty() || HasDefault);
5889
5890 // Figure out if one of the sets of cases form a contiguous range.
5891 std::optional<ContiguousCasesResult> ContiguousCases;
5892
5893 // Only one icmp is needed when there is only one case.
5894 if (!HasDefault && CasesA.size() == 1)
5895 ContiguousCases = ContiguousCasesResult{
5896 /*Min=*/CasesA[0],
5897 /*Max=*/CasesA[0],
5898 /*Dest=*/DestA,
5899 /*OtherDest=*/DestB,
5900 /*Cases=*/&CasesA,
5901 /*OtherCases=*/&CasesB,
5902 };
5903 else if (CasesB.size() == 1)
5904 ContiguousCases = ContiguousCasesResult{
5905 /*Min=*/CasesB[0],
5906 /*Max=*/CasesB[0],
5907 /*Dest=*/DestB,
5908 /*OtherDest=*/DestA,
5909 /*Cases=*/&CasesB,
5910 /*OtherCases=*/&CasesA,
5911 };
5912 // Correctness: Cases to the default destination cannot be contiguous cases.
5913 else if (!HasDefault)
5914 ContiguousCases =
5915 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
5916
5917 if (!ContiguousCases)
5918 ContiguousCases =
5919 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
5920
5921 if (!ContiguousCases)
5922 return false;
5923
5924 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
5925
5926 // Start building the compare and branch.
5927
5929 Constant *NumCases = ConstantInt::get(Offset->getType(),
5930 Max->getValue() - Min->getValue() + 1);
5931 BranchInst *NewBI;
5932 if (NumCases->isOneValue()) {
5933 assert(Max->getValue() == Min->getValue());
5934 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
5935 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5936 }
5937 // If NumCases overflowed, then all possible values jump to the successor.
5938 else if (NumCases->isNullValue() && !Cases->empty()) {
5939 NewBI = Builder.CreateBr(Dest);
5940 } else {
5941 Value *Sub = SI->getCondition();
5942 if (!Offset->isNullValue())
5943 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5944 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5945 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5946 }
5947
5948 // Update weight for the newly-created conditional branch.
5949 if (hasBranchWeightMD(*SI)) {
5950 SmallVector<uint64_t, 8> Weights;
5951 getBranchWeights(SI, Weights);
5952 if (Weights.size() == 1 + SI->getNumCases()) {
5953 uint64_t TrueWeight = 0;
5954 uint64_t FalseWeight = 0;
5955 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5956 if (SI->getSuccessor(I) == Dest)
5957 TrueWeight += Weights[I];
5958 else
5959 FalseWeight += Weights[I];
5960 }
5961 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5962 TrueWeight /= 2;
5963 FalseWeight /= 2;
5964 }
5965 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
5966 /*IsExpected=*/false, /*ElideAllZero=*/true);
5967 }
5968 }
5969
5970 // Prune obsolete incoming values off the successors' PHI nodes.
5971 for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) {
5972 unsigned PreviousEdges = Cases->size();
5973 if (Dest == SI->getDefaultDest())
5974 ++PreviousEdges;
5975 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5976 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5977 }
5978 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5979 unsigned PreviousEdges = OtherCases->size();
5980 if (OtherDest == SI->getDefaultDest())
5981 ++PreviousEdges;
5982 unsigned E = PreviousEdges - 1;
5983 // Remove all incoming values from OtherDest if OtherDest is unreachable.
5984 if (NewBI->isUnconditional())
5985 ++E;
5986 for (unsigned I = 0; I != E; ++I)
5987 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5988 }
5989
5990 // Clean up the default block - it may have phis or other instructions before
5991 // the unreachable terminator.
5992 if (!HasDefault)
5994
5995 auto *UnreachableDefault = SI->getDefaultDest();
5996
5997 // Drop the switch.
5998 SI->eraseFromParent();
5999
6000 if (!HasDefault && DTU)
6001 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6002
6003 return true;
6004}
6005
6006/// Compute masked bits for the condition of a switch
6007/// and use it to remove dead cases.
6009 AssumptionCache *AC,
6010 const DataLayout &DL) {
6011 Value *Cond = SI->getCondition();
6012 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6013
6014 // We can also eliminate cases by determining that their values are outside of
6015 // the limited range of the condition based on how many significant (non-sign)
6016 // bits are in the condition value.
6017 unsigned MaxSignificantBitsInCond =
6019
6020 // Gather dead cases.
6022 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6023 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6024 for (const auto &Case : SI->cases()) {
6025 auto *Successor = Case.getCaseSuccessor();
6026 if (DTU) {
6027 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6028 if (Inserted)
6029 UniqueSuccessors.push_back(Successor);
6030 ++It->second;
6031 }
6032 const APInt &CaseVal = Case.getCaseValue()->getValue();
6033 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6034 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
6035 DeadCases.push_back(Case.getCaseValue());
6036 if (DTU)
6037 --NumPerSuccessorCases[Successor];
6038 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6039 << " is dead.\n");
6040 }
6041 }
6042
6043 // If we can prove that the cases must cover all possible values, the
6044 // default destination becomes dead and we can remove it. If we know some
6045 // of the bits in the value, we can use that to more precisely compute the
6046 // number of possible unique case values.
6047 bool HasDefault = !SI->defaultDestUnreachable();
6048 const unsigned NumUnknownBits =
6049 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6050 assert(NumUnknownBits <= Known.getBitWidth());
6051 if (HasDefault && DeadCases.empty() &&
6052 NumUnknownBits < 64 /* avoid overflow */) {
6053 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6054 if (SI->getNumCases() == AllNumCases) {
6056 return true;
6057 }
6058 // When only one case value is missing, replace default with that case.
6059 // Eliminating the default branch will provide more opportunities for
6060 // optimization, such as lookup tables.
6061 if (SI->getNumCases() == AllNumCases - 1) {
6062 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6063 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6064 if (CondTy->getIntegerBitWidth() > 64 ||
6065 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6066 return false;
6067
6068 uint64_t MissingCaseVal = 0;
6069 for (const auto &Case : SI->cases())
6070 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6071 auto *MissingCase =
6072 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
6074 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
6075 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
6076 SIW.setSuccessorWeight(0, 0);
6077 return true;
6078 }
6079 }
6080
6081 if (DeadCases.empty())
6082 return false;
6083
6085 for (ConstantInt *DeadCase : DeadCases) {
6086 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6087 assert(CaseI != SI->case_default() &&
6088 "Case was not found. Probably mistake in DeadCases forming.");
6089 // Prune unused values from PHI nodes.
6090 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6091 SIW.removeCase(CaseI);
6092 }
6093
6094 if (DTU) {
6095 std::vector<DominatorTree::UpdateType> Updates;
6096 for (auto *Successor : UniqueSuccessors)
6097 if (NumPerSuccessorCases[Successor] == 0)
6098 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6099 DTU->applyUpdates(Updates);
6100 }
6101
6102 return true;
6103}
6104
6105/// If BB would be eligible for simplification by
6106/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6107/// by an unconditional branch), look at the phi node for BB in the successor
6108/// block and see if the incoming value is equal to CaseValue. If so, return
6109/// the phi node, and set PhiIndex to BB's index in the phi node.
6111 BasicBlock *BB, int *PhiIndex) {
6112 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6113 return nullptr; // BB must be empty to be a candidate for simplification.
6114 if (!BB->getSinglePredecessor())
6115 return nullptr; // BB must be dominated by the switch.
6116
6118 if (!Branch || !Branch->isUnconditional())
6119 return nullptr; // Terminator must be unconditional branch.
6120
6121 BasicBlock *Succ = Branch->getSuccessor(0);
6122
6123 for (PHINode &PHI : Succ->phis()) {
6124 int Idx = PHI.getBasicBlockIndex(BB);
6125 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6126
6127 Value *InValue = PHI.getIncomingValue(Idx);
6128 if (InValue != CaseValue)
6129 continue;
6130
6131 *PhiIndex = Idx;
6132 return &PHI;
6133 }
6134
6135 return nullptr;
6136}
6137
6138/// Try to forward the condition of a switch instruction to a phi node
6139/// dominated by the switch, if that would mean that some of the destination
6140/// blocks of the switch can be folded away. Return true if a change is made.
6142 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6143
6144 ForwardingNodesMap ForwardingNodes;
6145 BasicBlock *SwitchBlock = SI->getParent();
6146 bool Changed = false;
6147 for (const auto &Case : SI->cases()) {
6148 ConstantInt *CaseValue = Case.getCaseValue();
6149 BasicBlock *CaseDest = Case.getCaseSuccessor();
6150
6151 // Replace phi operands in successor blocks that are using the constant case
6152 // value rather than the switch condition variable:
6153 // switchbb:
6154 // switch i32 %x, label %default [
6155 // i32 17, label %succ
6156 // ...
6157 // succ:
6158 // %r = phi i32 ... [ 17, %switchbb ] ...
6159 // -->
6160 // %r = phi i32 ... [ %x, %switchbb ] ...
6161
6162 for (PHINode &Phi : CaseDest->phis()) {
6163 // This only works if there is exactly 1 incoming edge from the switch to
6164 // a phi. If there is >1, that means multiple cases of the switch map to 1
6165 // value in the phi, and that phi value is not the switch condition. Thus,
6166 // this transform would not make sense (the phi would be invalid because
6167 // a phi can't have different incoming values from the same block).
6168 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6169 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6170 count(Phi.blocks(), SwitchBlock) == 1) {
6171 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6172 Changed = true;
6173 }
6174 }
6175
6176 // Collect phi nodes that are indirectly using this switch's case constants.
6177 int PhiIdx;
6178 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6179 ForwardingNodes[Phi].push_back(PhiIdx);
6180 }
6181
6182 for (auto &ForwardingNode : ForwardingNodes) {
6183 PHINode *Phi = ForwardingNode.first;
6184 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6185 // Check if it helps to fold PHI.
6186 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6187 continue;
6188
6189 for (int Index : Indexes)
6190 Phi->setIncomingValue(Index, SI->getCondition());
6191 Changed = true;
6192 }
6193
6194 return Changed;
6195}
6196
6197/// Return true if the backend will be able to handle
6198/// initializing an array of constants like C.
6200 if (C->isThreadDependent())
6201 return false;
6202 if (C->isDLLImportDependent())
6203 return false;
6204
6205 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6208 return false;
6209
6211 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6212 // materializing the array of constants.
6213 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6214 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6215 return false;
6216 }
6217
6218 if (!TTI.shouldBuildLookupTablesForConstant(C))
6219 return false;
6220
6221 return true;
6222}
6223
6224/// If V is a Constant, return it. Otherwise, try to look up
6225/// its constant value in ConstantPool, returning 0 if it's not there.
6226static Constant *
6229 if (Constant *C = dyn_cast<Constant>(V))
6230 return C;
6231 return ConstantPool.lookup(V);
6232}
6233
6234/// Try to fold instruction I into a constant. This works for
6235/// simple instructions such as binary operations where both operands are
6236/// constant or can be replaced by constants from the ConstantPool. Returns the
6237/// resulting constant on success, 0 otherwise.
6238static Constant *
6242 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6243 if (!A)
6244 return nullptr;
6245 if (A->isAllOnesValue())
6246 return lookupConstant(Select->getTrueValue(), ConstantPool);
6247 if (A->isNullValue())
6248 return lookupConstant(Select->getFalseValue(), ConstantPool);
6249 return nullptr;
6250 }
6251
6253 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6254 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6255 COps.push_back(A);
6256 else
6257 return nullptr;
6258 }
6259
6260 return ConstantFoldInstOperands(I, COps, DL);
6261}
6262
6263/// Try to determine the resulting constant values in phi nodes
6264/// at the common destination basic block, *CommonDest, for one of the case
6265/// destionations CaseDest corresponding to value CaseVal (0 for the default
6266/// case), of a switch instruction SI.
6267static bool
6269 BasicBlock **CommonDest,
6270 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6271 const DataLayout &DL, const TargetTransformInfo &TTI) {
6272 // The block from which we enter the common destination.
6273 BasicBlock *Pred = SI->getParent();
6274
6275 // If CaseDest is empty except for some side-effect free instructions through
6276 // which we can constant-propagate the CaseVal, continue to its successor.
6278 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6279 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6280 if (I.isTerminator()) {
6281 // If the terminator is a simple branch, continue to the next block.
6282 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6283 return false;
6284 Pred = CaseDest;
6285 CaseDest = I.getSuccessor(0);
6286 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6287 // Instruction is side-effect free and constant.
6288
6289 // If the instruction has uses outside this block or a phi node slot for
6290 // the block, it is not safe to bypass the instruction since it would then
6291 // no longer dominate all its uses.
6292 for (auto &Use : I.uses()) {
6293 User *User = Use.getUser();
6295 if (I->getParent() == CaseDest)
6296 continue;
6297 if (PHINode *Phi = dyn_cast<PHINode>(User))
6298 if (Phi->getIncomingBlock(Use) == CaseDest)
6299 continue;
6300 return false;
6301 }
6302
6303 ConstantPool.insert(std::make_pair(&I, C));
6304 } else {
6305 break;
6306 }
6307 }
6308
6309 // If we did not have a CommonDest before, use the current one.
6310 if (!*CommonDest)
6311 *CommonDest = CaseDest;
6312 // If the destination isn't the common one, abort.
6313 if (CaseDest != *CommonDest)
6314 return false;
6315
6316 // Get the values for this case from phi nodes in the destination block.
6317 for (PHINode &PHI : (*CommonDest)->phis()) {
6318 int Idx = PHI.getBasicBlockIndex(Pred);
6319 if (Idx == -1)
6320 continue;
6321
6322 Constant *ConstVal =
6323 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6324 if (!ConstVal)
6325 return false;
6326
6327 // Be conservative about which kinds of constants we support.
6328 if (!validLookupTableConstant(ConstVal, TTI))
6329 return false;
6330
6331 Res.push_back(std::make_pair(&PHI, ConstVal));
6332 }
6333
6334 return Res.size() > 0;
6335}
6336
6337// Helper function used to add CaseVal to the list of cases that generate
6338// Result. Returns the updated number of cases that generate this result.
6339static size_t mapCaseToResult(ConstantInt *CaseVal,
6340 SwitchCaseResultVectorTy &UniqueResults,
6341 Constant *Result) {
6342 for (auto &I : UniqueResults) {
6343 if (I.first == Result) {
6344 I.second.push_back(CaseVal);
6345 return I.second.size();
6346 }
6347 }
6348 UniqueResults.push_back(
6349 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6350 return 1;
6351}
6352
6353// Helper function that initializes a map containing
6354// results for the PHI node of the common destination block for a switch
6355// instruction. Returns false if multiple PHI nodes have been found or if
6356// there is not a common destination block for the switch.
6358 BasicBlock *&CommonDest,
6359 SwitchCaseResultVectorTy &UniqueResults,
6360 Constant *&DefaultResult,
6361 const DataLayout &DL,
6362 const TargetTransformInfo &TTI,
6363 uintptr_t MaxUniqueResults) {
6364 for (const auto &I : SI->cases()) {
6365 ConstantInt *CaseVal = I.getCaseValue();
6366
6367 // Resulting value at phi nodes for this case value.
6368 SwitchCaseResultsTy Results;
6369 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6370 DL, TTI))
6371 return false;
6372
6373 // Only one value per case is permitted.
6374 if (Results.size() > 1)
6375 return false;
6376
6377 // Add the case->result mapping to UniqueResults.
6378 const size_t NumCasesForResult =
6379 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6380
6381 // Early out if there are too many cases for this result.
6382 if (NumCasesForResult > MaxSwitchCasesPerResult)
6383 return false;
6384
6385 // Early out if there are too many unique results.
6386 if (UniqueResults.size() > MaxUniqueResults)
6387 return false;
6388
6389 // Check the PHI consistency.
6390 if (!PHI)
6391 PHI = Results[0].first;
6392 else if (PHI != Results[0].first)
6393 return false;
6394 }
6395 // Find the default result value.
6397 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6398 DL, TTI);
6399 // If the default value is not found abort unless the default destination
6400 // is unreachable.
6401 DefaultResult =
6402 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6403
6404 return DefaultResult || SI->defaultDestUnreachable();
6405}
6406
6407// Helper function that checks if it is possible to transform a switch with only
6408// two cases (or two cases + default) that produces a result into a select.
6409// TODO: Handle switches with more than 2 cases that map to the same result.
6410// The branch weights correspond to the provided Condition (i.e. if Condition is
6411// modified from the original SwitchInst, the caller must adjust the weights)
6412static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6413 Constant *DefaultResult, Value *Condition,
6414 IRBuilder<> &Builder, const DataLayout &DL,
6415 ArrayRef<uint32_t> BranchWeights) {
6416 // If we are selecting between only two cases transform into a simple
6417 // select or a two-way select if default is possible.
6418 // Example:
6419 // switch (a) { %0 = icmp eq i32 %a, 10
6420 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6421 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6422 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6423 // }
6424
6425 const bool HasBranchWeights =
6426 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6427
6428 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6429 ResultVector[1].second.size() == 1) {
6430 ConstantInt *FirstCase = ResultVector[0].second[0];
6431 ConstantInt *SecondCase = ResultVector[1].second[0];
6432 Value *SelectValue = ResultVector[1].first;
6433 if (DefaultResult) {
6434 Value *ValueCompare =
6435 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6436 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6437 DefaultResult, "switch.select");
6438 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6439 SI && HasBranchWeights) {
6440 // We start with 3 probabilities, where the numerator is the
6441 // corresponding BranchWeights[i], and the denominator is the sum over
6442 // BranchWeights. We want the probability and negative probability of
6443 // Condition == SecondCase.
6444 assert(BranchWeights.size() == 3);
6446 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6447 /*IsExpected=*/false, /*ElideAllZero=*/true);
6448 }
6449 }
6450 Value *ValueCompare =
6451 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6452 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6453 SelectValue, "switch.select");
6454 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6455 // We may have had a DefaultResult. Base the position of the first and
6456 // second's branch weights accordingly. Also the proability that Condition
6457 // != FirstCase needs to take that into account.
6458 assert(BranchWeights.size() >= 2);
6459 size_t FirstCasePos = (Condition != nullptr);
6460 size_t SecondCasePos = FirstCasePos + 1;
6461 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6463 {BranchWeights[FirstCasePos],
6464 DefaultCase + BranchWeights[SecondCasePos]},
6465 /*IsExpected=*/false, /*ElideAllZero=*/true);
6466 }
6467 return Ret;
6468 }
6469
6470 // Handle the degenerate case where two cases have the same result value.
6471 if (ResultVector.size() == 1 && DefaultResult) {
6472 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6473 unsigned CaseCount = CaseValues.size();
6474 // n bits group cases map to the same result:
6475 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6476 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6477 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6478 if (isPowerOf2_32(CaseCount)) {
6479 ConstantInt *MinCaseVal = CaseValues[0];
6480 // If there are bits that are set exclusively by CaseValues, we
6481 // can transform the switch into a select if the conjunction of
6482 // all the values uniquely identify CaseValues.
6483 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6484
6485 // Find the minimum value and compute the and of all the case values.
6486 for (auto *Case : CaseValues) {
6487 if (Case->getValue().slt(MinCaseVal->getValue()))
6488 MinCaseVal = Case;
6489 AndMask &= Case->getValue();
6490 }
6491 KnownBits Known = computeKnownBits(Condition, DL);
6492
6493 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6494 // Compute the number of bits that are free to vary.
6495 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6496
6497 // Check if the number of values covered by the mask is equal
6498 // to the number of cases.
6499 if (FreeBits == Log2_32(CaseCount)) {
6500 Value *And = Builder.CreateAnd(Condition, AndMask);
6501 Value *Cmp = Builder.CreateICmpEQ(
6502 And, Constant::getIntegerValue(And->getType(), AndMask));
6503 Value *Ret =
6504 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6505 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6506 // We know there's a Default case. We base the resulting branch
6507 // weights off its probability.
6508 assert(BranchWeights.size() >= 2);
6510 *SI,
6511 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6512 /*IsExpected=*/false, /*ElideAllZero=*/true);
6513 }
6514 return Ret;
6515 }
6516 }
6517
6518 // Mark the bits case number touched.
6519 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6520 for (auto *Case : CaseValues)
6521 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6522
6523 // Check if cases with the same result can cover all number
6524 // in touched bits.
6525 if (BitMask.popcount() == Log2_32(CaseCount)) {
6526 if (!MinCaseVal->isNullValue())
6527 Condition = Builder.CreateSub(Condition, MinCaseVal);
6528 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6529 Value *Cmp = Builder.CreateICmpEQ(
6530 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6531 Value *Ret =
6532 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6533 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6534 assert(BranchWeights.size() >= 2);
6536 *SI,
6537 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6538 /*IsExpected=*/false, /*ElideAllZero=*/true);
6539 }
6540 return Ret;
6541 }
6542 }
6543
6544 // Handle the degenerate case where two cases have the same value.
6545 if (CaseValues.size() == 2) {
6546 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6547 "switch.selectcmp.case1");
6548 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6549 "switch.selectcmp.case2");
6550 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6551 Value *Ret =
6552 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6553 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6554 assert(BranchWeights.size() >= 2);
6556 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6557 /*IsExpected=*/false, /*ElideAllZero=*/true);
6558 }
6559 return Ret;
6560 }
6561 }
6562
6563 return nullptr;
6564}
6565
6566// Helper function to cleanup a switch instruction that has been converted into
6567// a select, fixing up PHI nodes and basic blocks.
6569 Value *SelectValue,
6570 IRBuilder<> &Builder,
6571 DomTreeUpdater *DTU) {
6572 std::vector<DominatorTree::UpdateType> Updates;
6573
6574 BasicBlock *SelectBB = SI->getParent();
6575 BasicBlock *DestBB = PHI->getParent();
6576
6577 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6578 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6579 Builder.CreateBr(DestBB);
6580
6581 // Remove the switch.
6582
6583 PHI->removeIncomingValueIf(
6584 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6585 PHI->addIncoming(SelectValue, SelectBB);
6586
6587 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6588 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6589 BasicBlock *Succ = SI->getSuccessor(i);
6590
6591 if (Succ == DestBB)
6592 continue;
6593 Succ->removePredecessor(SelectBB);
6594 if (DTU && RemovedSuccessors.insert(Succ).second)
6595 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6596 }
6597 SI->eraseFromParent();
6598 if (DTU)
6599 DTU->applyUpdates(Updates);
6600}
6601
6602/// If a switch is only used to initialize one or more phi nodes in a common
6603/// successor block with only two different constant values, try to replace the
6604/// switch with a select. Returns true if the fold was made.
6606 DomTreeUpdater *DTU, const DataLayout &DL,
6607 const TargetTransformInfo &TTI) {
6608 Value *const Cond = SI->getCondition();
6609 PHINode *PHI = nullptr;
6610 BasicBlock *CommonDest = nullptr;
6611 Constant *DefaultResult;
6612 SwitchCaseResultVectorTy UniqueResults;
6613 // Collect all the cases that will deliver the same value from the switch.
6614 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6615 DL, TTI, /*MaxUniqueResults*/ 2))
6616 return false;
6617
6618 assert(PHI != nullptr && "PHI for value select not found");
6619 Builder.SetInsertPoint(SI);
6620 SmallVector<uint32_t, 4> BranchWeights;
6622 [[maybe_unused]] auto HasWeights =
6624 assert(!HasWeights == (BranchWeights.empty()));
6625 }
6626 assert(BranchWeights.empty() ||
6627 (BranchWeights.size() >=
6628 UniqueResults.size() + (DefaultResult != nullptr)));
6629
6630 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6631 Builder, DL, BranchWeights);
6632 if (!SelectValue)
6633 return false;
6634
6635 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6636 return true;
6637}
6638
6639namespace {
6640
6641/// This class finds alternatives for switches to ultimately
6642/// replace the switch.
6643class SwitchReplacement {
6644public:
6645 /// Create a helper for optimizations to use as a switch replacement.
6646 /// Find a better representation for the content of Values,
6647 /// using DefaultValue to fill any holes in the table.
6648 SwitchReplacement(
6649 Module &M, uint64_t TableSize, ConstantInt *Offset,
6650 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6651 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6652
6653 /// Build instructions with Builder to retrieve values using Index
6654 /// and replace the switch.
6655 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6656 Function *Func);
6657
6658 /// Return true if a table with TableSize elements of
6659 /// type ElementType would fit in a target-legal register.
6660 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6661 Type *ElementType);
6662
6663 /// Return the default value of the switch.
6664 Constant *getDefaultValue();
6665
6666 /// Return true if the replacement is a lookup table.
6667 bool isLookupTable();
6668
6669 /// Return true if the replacement is a bit map.
6670 bool isBitMap();
6671
6672private:
6673 // Depending on the switch, there are different alternatives.
6674 enum {
6675 // For switches where each case contains the same value, we just have to
6676 // store that single value and return it for each lookup.
6677 SingleValueKind,
6678
6679 // For switches where there is a linear relationship between table index
6680 // and values. We calculate the result with a simple multiplication
6681 // and addition instead of a table lookup.
6682 LinearMapKind,
6683
6684 // For small tables with integer elements, we can pack them into a bitmap
6685 // that fits into a target-legal register. Values are retrieved by
6686 // shift and mask operations.
6687 BitMapKind,
6688
6689 // The table is stored as an array of values. Values are retrieved by load
6690 // instructions from the table.
6691 LookupTableKind
6692 } Kind;
6693
6694 // The default value of the switch.
6695 Constant *DefaultValue;
6696
6697 // The type of the output values.
6698 Type *ValueType;
6699
6700 // For SingleValueKind, this is the single value.
6701 Constant *SingleValue = nullptr;
6702
6703 // For BitMapKind, this is the bitmap.
6704 ConstantInt *BitMap = nullptr;
6705 IntegerType *BitMapElementTy = nullptr;
6706
6707 // For LinearMapKind, these are the constants used to derive the value.
6708 ConstantInt *LinearOffset = nullptr;
6709 ConstantInt *LinearMultiplier = nullptr;
6710 bool LinearMapValWrapped = false;
6711
6712 // For LookupTableKind, this is the table.
6713 Constant *Initializer = nullptr;
6714};
6715
6716} // end anonymous namespace
6717
6718SwitchReplacement::SwitchReplacement(
6719 Module &M, uint64_t TableSize, ConstantInt *Offset,
6720 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6721 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6722 : DefaultValue(DefaultValue) {
6723 assert(Values.size() && "Can't build lookup table without values!");
6724 assert(TableSize >= Values.size() && "Can't fit values in table!");
6725
6726 // If all values in the table are equal, this is that value.
6727 SingleValue = Values.begin()->second;
6728
6729 ValueType = Values.begin()->second->getType();
6730
6731 // Build up the table contents.
6732 SmallVector<Constant *, 64> TableContents(TableSize);
6733 for (const auto &[CaseVal, CaseRes] : Values) {
6734 assert(CaseRes->getType() == ValueType);
6735
6736 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6737 TableContents[Idx] = CaseRes;
6738
6739 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6740 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6741 }
6742
6743 // Fill in any holes in the table with the default result.
6744 if (Values.size() < TableSize) {
6745 assert(DefaultValue &&
6746 "Need a default value to fill the lookup table holes.");
6747 assert(DefaultValue->getType() == ValueType);
6748 for (uint64_t I = 0; I < TableSize; ++I) {
6749 if (!TableContents[I])
6750 TableContents[I] = DefaultValue;
6751 }
6752
6753 // If the default value is poison, all the holes are poison.
6754 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6755
6756 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6757 SingleValue = nullptr;
6758 }
6759
6760 // If each element in the table contains the same value, we only need to store
6761 // that single value.
6762 if (SingleValue) {
6763 Kind = SingleValueKind;
6764 return;
6765 }
6766
6767 // Check if we can derive the value with a linear transformation from the
6768 // table index.
6770 bool LinearMappingPossible = true;
6771 APInt PrevVal;
6772 APInt DistToPrev;
6773 // When linear map is monotonic and signed overflow doesn't happen on
6774 // maximum index, we can attach nsw on Add and Mul.
6775 bool NonMonotonic = false;
6776 assert(TableSize >= 2 && "Should be a SingleValue table.");
6777 // Check if there is the same distance between two consecutive values.
6778 for (uint64_t I = 0; I < TableSize; ++I) {
6779 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6780
6781 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6782 // This is an poison, so it's (probably) a lookup table hole.
6783 // To prevent any regressions from before we switched to using poison as
6784 // the default value, holes will fall back to using the first value.
6785 // This can be removed once we add proper handling for poisons in lookup
6786 // tables.
6787 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6788 }
6789
6790 if (!ConstVal) {
6791 // This is an undef. We could deal with it, but undefs in lookup tables
6792 // are very seldom. It's probably not worth the additional complexity.
6793 LinearMappingPossible = false;
6794 break;
6795 }
6796 const APInt &Val = ConstVal->getValue();
6797 if (I != 0) {
6798 APInt Dist = Val - PrevVal;
6799 if (I == 1) {
6800 DistToPrev = Dist;
6801 } else if (Dist != DistToPrev) {
6802 LinearMappingPossible = false;
6803 break;
6804 }
6805 NonMonotonic |=
6806 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6807 }
6808 PrevVal = Val;
6809 }
6810 if (LinearMappingPossible) {
6811 LinearOffset = cast<ConstantInt>(TableContents[0]);
6812 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6813 APInt M = LinearMultiplier->getValue();
6814 bool MayWrap = true;
6815 if (isIntN(M.getBitWidth(), TableSize - 1))
6816 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6817 LinearMapValWrapped = NonMonotonic || MayWrap;
6818 Kind = LinearMapKind;
6819 return;
6820 }
6821 }
6822
6823 // If the type is integer and the table fits in a register, build a bitmap.
6824 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6826 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6827 for (uint64_t I = TableSize; I > 0; --I) {
6828 TableInt <<= IT->getBitWidth();
6829 // Insert values into the bitmap. Undef values are set to zero.
6830 if (!isa<UndefValue>(TableContents[I - 1])) {
6831 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6832 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6833 }
6834 }
6835 BitMap = ConstantInt::get(M.getContext(), TableInt);
6836 BitMapElementTy = IT;
6837 Kind = BitMapKind;
6838 return;
6839 }
6840
6841 // Store the table in an array.
6842 auto *TableTy = ArrayType::get(ValueType, TableSize);
6843 Initializer = ConstantArray::get(TableTy, TableContents);
6844
6845 Kind = LookupTableKind;
6846}
6847
6848Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6849 const DataLayout &DL, Function *Func) {
6850 switch (Kind) {
6851 case SingleValueKind:
6852 return SingleValue;
6853 case LinearMapKind: {
6854 ++NumLinearMaps;
6855 // Derive the result value from the input value.
6856 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6857 false, "switch.idx.cast");
6858 if (!LinearMultiplier->isOne())
6859 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6860 /*HasNUW = */ false,
6861 /*HasNSW = */ !LinearMapValWrapped);
6862
6863 if (!LinearOffset->isZero())
6864 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6865 /*HasNUW = */ false,
6866 /*HasNSW = */ !LinearMapValWrapped);
6867 return Result;
6868 }
6869 case BitMapKind: {
6870 ++NumBitMaps;
6871 // Type of the bitmap (e.g. i59).
6872 IntegerType *MapTy = BitMap->getIntegerType();
6873
6874 // Cast Index to the same type as the bitmap.
6875 // Note: The Index is <= the number of elements in the table, so
6876 // truncating it to the width of the bitmask is safe.
6877 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6878
6879 // Multiply the shift amount by the element width. NUW/NSW can always be
6880 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6881 // BitMap's bit width.
6882 ShiftAmt = Builder.CreateMul(
6883 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6884 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6885
6886 // Shift down.
6887 Value *DownShifted =
6888 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6889 // Mask off.
6890 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6891 }
6892 case LookupTableKind: {
6893 ++NumLookupTables;
6894 auto *Table =
6895 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6896 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6897 Initializer, "switch.table." + Func->getName());
6898 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6899 // Set the alignment to that of an array items. We will be only loading one
6900 // value out of it.
6901 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6902 Type *IndexTy = DL.getIndexType(Table->getType());
6903 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6904
6905 if (Index->getType() != IndexTy) {
6906 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6907 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6908 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6909 Zext->setNonNeg(
6910 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6911 }
6912
6913 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6914 Value *GEP =
6915 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6916 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6917 }
6918 }
6919 llvm_unreachable("Unknown helper kind!");
6920}
6921
6922bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6923 uint64_t TableSize,
6924 Type *ElementType) {
6925 auto *IT = dyn_cast<IntegerType>(ElementType);
6926 if (!IT)
6927 return false;
6928 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6929 // are <= 15, we could try to narrow the type.
6930
6931 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6932 if (TableSize >= UINT_MAX / IT->getBitWidth())
6933 return false;
6934 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6935}
6936
6938 const DataLayout &DL) {
6939 // Allow any legal type.
6940 if (TTI.isTypeLegal(Ty))
6941 return true;
6942
6943 auto *IT = dyn_cast<IntegerType>(Ty);
6944 if (!IT)
6945 return false;
6946
6947 // Also allow power of 2 integer types that have at least 8 bits and fit in
6948 // a register. These types are common in frontend languages and targets
6949 // usually support loads of these types.
6950 // TODO: We could relax this to any integer that fits in a register and rely
6951 // on ABI alignment and padding in the table to allow the load to be widened.
6952 // Or we could widen the constants and truncate the load.
6953 unsigned BitWidth = IT->getBitWidth();
6954 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6955 DL.fitsInLegalInteger(IT->getBitWidth());
6956}
6957
6958Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6959
6960bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6961
6962bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
6963
6964static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6965 // 40% is the default density for building a jump table in optsize/minsize
6966 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6967 // function was based on.
6968 const uint64_t MinDensity = 40;
6969
6970 if (CaseRange >= UINT64_MAX / 100)
6971 return false; // Avoid multiplication overflows below.
6972
6973 return NumCases * 100 >= CaseRange * MinDensity;
6974}
6975
6977 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6978 uint64_t Range = Diff + 1;
6979 if (Range < Diff)
6980 return false; // Overflow.
6981
6982 return isSwitchDense(Values.size(), Range);
6983}
6984
6985/// Determine whether a lookup table should be built for this switch, based on
6986/// the number of cases, size of the table, and the types of the results.
6987// TODO: We could support larger than legal types by limiting based on the
6988// number of loads required and/or table size. If the constants are small we
6989// could use smaller table entries and extend after the load.
6991 const TargetTransformInfo &TTI,
6992 const DataLayout &DL,
6993 const SmallVector<Type *> &ResultTypes) {
6994 if (SI->getNumCases() > TableSize)
6995 return false; // TableSize overflowed.
6996
6997 bool AllTablesFitInRegister = true;
6998 bool HasIllegalType = false;
6999 for (const auto &Ty : ResultTypes) {
7000 // Saturate this flag to true.
7001 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7002
7003 // Saturate this flag to false.
7004 AllTablesFitInRegister =
7005 AllTablesFitInRegister &&
7006 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7007
7008 // If both flags saturate, we're done. NOTE: This *only* works with
7009 // saturating flags, and all flags have to saturate first due to the
7010 // non-deterministic behavior of iterating over a dense map.
7011 if (HasIllegalType && !AllTablesFitInRegister)
7012 break;
7013 }
7014
7015 // If each table would fit in a register, we should build it anyway.
7016 if (AllTablesFitInRegister)
7017 return true;
7018
7019 // Don't build a table that doesn't fit in-register if it has illegal types.
7020 if (HasIllegalType)
7021 return false;
7022
7023 return isSwitchDense(SI->getNumCases(), TableSize);
7024}
7025
7027 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7028 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7029 const DataLayout &DL, const TargetTransformInfo &TTI) {
7030 if (MinCaseVal.isNullValue())
7031 return true;
7032 if (MinCaseVal.isNegative() ||
7033 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7034 !HasDefaultResults)
7035 return false;
7036 return all_of(ResultTypes, [&](const auto &ResultType) {
7037 return SwitchReplacement::wouldFitInRegister(
7038 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7039 });
7040}
7041
7042/// Try to reuse the switch table index compare. Following pattern:
7043/// \code
7044/// if (idx < tablesize)
7045/// r = table[idx]; // table does not contain default_value
7046/// else
7047/// r = default_value;
7048/// if (r != default_value)
7049/// ...
7050/// \endcode
7051/// Is optimized to:
7052/// \code
7053/// cond = idx < tablesize;
7054/// if (cond)
7055/// r = table[idx];
7056/// else
7057/// r = default_value;
7058/// if (cond)
7059/// ...
7060/// \endcode
7061/// Jump threading will then eliminate the second if(cond).
7063 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7064 Constant *DefaultValue,
7065 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7067 if (!CmpInst)
7068 return;
7069
7070 // We require that the compare is in the same block as the phi so that jump
7071 // threading can do its work afterwards.
7072 if (CmpInst->getParent() != PhiBlock)
7073 return;
7074
7076 if (!CmpOp1)
7077 return;
7078
7079 Value *RangeCmp = RangeCheckBranch->getCondition();
7080 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7081 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7082
7083 // Check if the compare with the default value is constant true or false.
7084 const DataLayout &DL = PhiBlock->getDataLayout();
7086 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7087 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7088 return;
7089
7090 // Check if the compare with the case values is distinct from the default
7091 // compare result.
7092 for (auto ValuePair : Values) {
7094 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7095 if (!CaseConst || CaseConst == DefaultConst ||
7096 (CaseConst != TrueConst && CaseConst != FalseConst))
7097 return;
7098 }
7099
7100 // Check if the branch instruction dominates the phi node. It's a simple
7101 // dominance check, but sufficient for our needs.
7102 // Although this check is invariant in the calling loops, it's better to do it
7103 // at this late stage. Practically we do it at most once for a switch.
7104 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7105 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7106 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7107 return;
7108 }
7109
7110 if (DefaultConst == FalseConst) {
7111 // The compare yields the same result. We can replace it.
7112 CmpInst->replaceAllUsesWith(RangeCmp);
7113 ++NumTableCmpReuses;
7114 } else {
7115 // The compare yields the same result, just inverted. We can replace it.
7116 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7117 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7118 RangeCheckBranch->getIterator());
7119 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7120 ++NumTableCmpReuses;
7121 }
7122}
7123
7124/// If the switch is only used to initialize one or more phi nodes in a common
7125/// successor block with different constant values, replace the switch with
7126/// lookup tables.
7128 DomTreeUpdater *DTU, const DataLayout &DL,
7129 const TargetTransformInfo &TTI,
7130 bool ConvertSwitchToLookupTable) {
7131 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7132
7133 BasicBlock *BB = SI->getParent();
7134 Function *Fn = BB->getParent();
7135
7136 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7137 // split off a dense part and build a lookup table for that.
7138
7139 // FIXME: This creates arrays of GEPs to constant strings, which means each
7140 // GEP needs a runtime relocation in PIC code. We should just build one big
7141 // string and lookup indices into that.
7142
7143 // Ignore switches with less than three cases. Lookup tables will not make
7144 // them faster, so we don't analyze them.
7145 if (SI->getNumCases() < 3)
7146 return false;
7147
7148 // Figure out the corresponding result for each case value and phi node in the
7149 // common destination, as well as the min and max case values.
7150 assert(!SI->cases().empty());
7151 SwitchInst::CaseIt CI = SI->case_begin();
7152 ConstantInt *MinCaseVal = CI->getCaseValue();
7153 ConstantInt *MaxCaseVal = CI->getCaseValue();
7154
7155 BasicBlock *CommonDest = nullptr;
7156
7157 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7159
7161 SmallVector<Type *> ResultTypes;
7163
7164 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7165 ConstantInt *CaseVal = CI->getCaseValue();
7166 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7167 MinCaseVal = CaseVal;
7168 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7169 MaxCaseVal = CaseVal;
7170
7171 // Resulting value at phi nodes for this case value.
7173 ResultsTy Results;
7174 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7175 Results, DL, TTI))
7176 return false;
7177
7178 // Append the result and result types from this case to the list for each
7179 // phi.
7180 for (const auto &I : Results) {
7181 PHINode *PHI = I.first;
7182 Constant *Value = I.second;
7183 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7184 if (Inserted)
7185 PHIs.push_back(PHI);
7186 It->second.push_back(std::make_pair(CaseVal, Value));
7187 ResultTypes.push_back(PHI->getType());
7188 }
7189 }
7190
7191 // If the table has holes, we need a constant result for the default case
7192 // or a bitmask that fits in a register.
7193 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7194 bool HasDefaultResults =
7195 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7196 DefaultResultsList, DL, TTI);
7197 for (const auto &I : DefaultResultsList) {
7198 PHINode *PHI = I.first;
7199 Constant *Result = I.second;
7200 DefaultResults[PHI] = Result;
7201 }
7202
7203 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7204 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7205 uint64_t TableSize;
7206 ConstantInt *TableIndexOffset;
7207 if (UseSwitchConditionAsTableIndex) {
7208 TableSize = MaxCaseVal->getLimitedValue() + 1;
7209 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7210 } else {
7211 TableSize =
7212 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7213
7214 TableIndexOffset = MinCaseVal;
7215 }
7216
7217 // If the default destination is unreachable, or if the lookup table covers
7218 // all values of the conditional variable, branch directly to the lookup table
7219 // BB. Otherwise, check that the condition is within the case range.
7220 uint64_t NumResults = ResultLists[PHIs[0]].size();
7221 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7222
7223 bool TableHasHoles = (NumResults < TableSize);
7224
7225 // If the table has holes but the default destination doesn't produce any
7226 // constant results, the lookup table entries corresponding to the holes will
7227 // contain poison.
7228 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7229
7230 // If the default destination doesn't produce a constant result but is still
7231 // reachable, and the lookup table has holes, we need to use a mask to
7232 // determine if the current index should load from the lookup table or jump
7233 // to the default case.
7234 // The mask is unnecessary if the table has holes but the default destination
7235 // is unreachable, as in that case the holes must also be unreachable.
7236 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7237 if (NeedMask) {
7238 // As an extra penalty for the validity test we require more cases.
7239 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7240 return false;
7241 if (!DL.fitsInLegalInteger(TableSize))
7242 return false;
7243 }
7244
7245 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7246 return false;
7247
7248 // Compute the table index value.
7249 Value *TableIndex;
7250 if (UseSwitchConditionAsTableIndex) {
7251 TableIndex = SI->getCondition();
7252 if (HasDefaultResults) {
7253 // Grow the table to cover all possible index values to avoid the range
7254 // check. It will use the default result to fill in the table hole later,
7255 // so make sure it exist.
7256 ConstantRange CR =
7257 computeConstantRange(TableIndex, /* ForSigned */ false);
7258 // Grow the table shouldn't have any size impact by checking
7259 // wouldFitInRegister.
7260 // TODO: Consider growing the table also when it doesn't fit in a register
7261 // if no optsize is specified.
7262 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7263 if (!CR.isUpperWrapped() &&
7264 all_of(ResultTypes, [&](const auto &ResultType) {
7265 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7266 ResultType);
7267 })) {
7268 // There may be some case index larger than the UpperBound (unreachable
7269 // case), so make sure the table size does not get smaller.
7270 TableSize = std::max(UpperBound, TableSize);
7271 // The default branch is unreachable after we enlarge the lookup table.
7272 // Adjust DefaultIsReachable to reuse code path.
7273 DefaultIsReachable = false;
7274 }
7275 }
7276 }
7277
7278 // Keep track of the switch replacement for each phi
7280 for (PHINode *PHI : PHIs) {
7281 const auto &ResultList = ResultLists[PHI];
7282
7283 Type *ResultType = ResultList.begin()->second->getType();
7284 // Use any value to fill the lookup table holes.
7286 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7287 StringRef FuncName = Fn->getName();
7288 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7289 ResultList, DefaultVal, DL, FuncName);
7290 PhiToReplacementMap.insert({PHI, Replacement});
7291 }
7292
7293 bool AnyLookupTables = any_of(
7294 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7295 bool AnyBitMaps = any_of(PhiToReplacementMap,
7296 [](auto &KV) { return KV.second.isBitMap(); });
7297
7298 // A few conditions prevent the generation of lookup tables:
7299 // 1. The target does not support lookup tables.
7300 // 2. The "no-jump-tables" function attribute is set.
7301 // However, these objections do not apply to other switch replacements, like
7302 // the bitmap, so we only stop here if any of these conditions are met and we
7303 // want to create a LUT. Otherwise, continue with the switch replacement.
7304 if (AnyLookupTables &&
7305 (!TTI.shouldBuildLookupTables() ||
7306 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7307 return false;
7308
7309 // In the early optimization pipeline, disable formation of lookup tables,
7310 // bit maps and mask checks, as they may inhibit further optimization.
7311 if (!ConvertSwitchToLookupTable &&
7312 (AnyLookupTables || AnyBitMaps || NeedMask))
7313 return false;
7314
7315 Builder.SetInsertPoint(SI);
7316 // TableIndex is the switch condition - TableIndexOffset if we don't
7317 // use the condition directly
7318 if (!UseSwitchConditionAsTableIndex) {
7319 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7320 // we can try to attach nsw.
7321 bool MayWrap = true;
7322 if (!DefaultIsReachable) {
7323 APInt Res =
7324 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7325 (void)Res;
7326 }
7327 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7328 "switch.tableidx", /*HasNUW =*/false,
7329 /*HasNSW =*/!MayWrap);
7330 }
7331
7332 std::vector<DominatorTree::UpdateType> Updates;
7333
7334 // Compute the maximum table size representable by the integer type we are
7335 // switching upon.
7336 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7337 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7338 assert(MaxTableSize >= TableSize &&
7339 "It is impossible for a switch to have more entries than the max "
7340 "representable value of its input integer type's size.");
7341
7342 // Create the BB that does the lookups.
7343 Module &Mod = *CommonDest->getParent()->getParent();
7344 BasicBlock *LookupBB = BasicBlock::Create(
7345 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7346
7347 BranchInst *RangeCheckBranch = nullptr;
7348 BranchInst *CondBranch = nullptr;
7349
7350 Builder.SetInsertPoint(SI);
7351 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7352 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7353 Builder.CreateBr(LookupBB);
7354 if (DTU)
7355 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7356 // Note: We call removeProdecessor later since we need to be able to get the
7357 // PHI value for the default case in case we're using a bit mask.
7358 } else {
7359 Value *Cmp = Builder.CreateICmpULT(
7360 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7361 RangeCheckBranch =
7362 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7363 CondBranch = RangeCheckBranch;
7364 if (DTU)
7365 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7366 }
7367
7368 // Populate the BB that does the lookups.
7369 Builder.SetInsertPoint(LookupBB);
7370
7371 if (NeedMask) {
7372 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7373 // re-purposed to do the hole check, and we create a new LookupBB.
7374 BasicBlock *MaskBB = LookupBB;
7375 MaskBB->setName("switch.hole_check");
7376 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7377 CommonDest->getParent(), CommonDest);
7378
7379 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7380 // unnecessary illegal types.
7381 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7382 APInt MaskInt(TableSizePowOf2, 0);
7383 APInt One(TableSizePowOf2, 1);
7384 // Build bitmask; fill in a 1 bit for every case.
7385 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7386 for (const auto &Result : ResultList) {
7387 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7388 .getLimitedValue();
7389 MaskInt |= One << Idx;
7390 }
7391 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7392
7393 // Get the TableIndex'th bit of the bitmask.
7394 // If this bit is 0 (meaning hole) jump to the default destination,
7395 // else continue with table lookup.
7396 IntegerType *MapTy = TableMask->getIntegerType();
7397 Value *MaskIndex =
7398 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7399 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7400 Value *LoBit = Builder.CreateTrunc(
7401 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7402 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7403 if (DTU) {
7404 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7405 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7406 }
7407 Builder.SetInsertPoint(LookupBB);
7408 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7409 }
7410
7411 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7412 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7413 // do not delete PHINodes here.
7414 SI->getDefaultDest()->removePredecessor(BB,
7415 /*KeepOneInputPHIs=*/true);
7416 if (DTU)
7417 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7418 }
7419
7420 for (PHINode *PHI : PHIs) {
7421 const ResultListTy &ResultList = ResultLists[PHI];
7422 auto Replacement = PhiToReplacementMap.at(PHI);
7423 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7424 // Do a small peephole optimization: re-use the switch table compare if
7425 // possible.
7426 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7427 BasicBlock *PhiBlock = PHI->getParent();
7428 // Search for compare instructions which use the phi.
7429 for (auto *User : PHI->users()) {
7430 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7431 Replacement.getDefaultValue(), ResultList);
7432 }
7433 }
7434
7435 PHI->addIncoming(Result, LookupBB);
7436 }
7437
7438 Builder.CreateBr(CommonDest);
7439 if (DTU)
7440 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7441
7442 SmallVector<uint32_t> BranchWeights;
7443 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7444 extractBranchWeights(*SI, BranchWeights);
7445 uint64_t ToLookupWeight = 0;
7446 uint64_t ToDefaultWeight = 0;
7447
7448 // Remove the switch.
7449 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7450 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7451 BasicBlock *Succ = SI->getSuccessor(I);
7452
7453 if (Succ == SI->getDefaultDest()) {
7454 if (HasBranchWeights)
7455 ToDefaultWeight += BranchWeights[I];
7456 continue;
7457 }
7458 Succ->removePredecessor(BB);
7459 if (DTU && RemovedSuccessors.insert(Succ).second)
7460 Updates.push_back({DominatorTree::Delete, BB, Succ});
7461 if (HasBranchWeights)
7462 ToLookupWeight += BranchWeights[I];
7463 }
7464 SI->eraseFromParent();
7465 if (HasBranchWeights)
7466 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7467 /*IsExpected=*/false);
7468 if (DTU)
7469 DTU->applyUpdates(Updates);
7470
7471 if (NeedMask)
7472 ++NumLookupTablesHoles;
7473 return true;
7474}
7475
7476/// Try to transform a switch that has "holes" in it to a contiguous sequence
7477/// of cases.
7478///
7479/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7480/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7481///
7482/// This converts a sparse switch into a dense switch which allows better
7483/// lowering and could also allow transforming into a lookup table.
7485 const DataLayout &DL,
7486 const TargetTransformInfo &TTI) {
7487 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7488 if (CondTy->getIntegerBitWidth() > 64 ||
7489 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7490 return false;
7491 // Only bother with this optimization if there are more than 3 switch cases;
7492 // SDAG will only bother creating jump tables for 4 or more cases.
7493 if (SI->getNumCases() < 4)
7494 return false;
7495
7496 // This transform is agnostic to the signedness of the input or case values. We
7497 // can treat the case values as signed or unsigned. We can optimize more common
7498 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7499 // as signed.
7501 for (const auto &C : SI->cases())
7502 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7503 llvm::sort(Values);
7504
7505 // If the switch is already dense, there's nothing useful to do here.
7506 if (isSwitchDense(Values))
7507 return false;
7508
7509 // First, transform the values such that they start at zero and ascend.
7510 int64_t Base = Values[0];
7511 for (auto &V : Values)
7512 V -= (uint64_t)(Base);
7513
7514 // Now we have signed numbers that have been shifted so that, given enough
7515 // precision, there are no negative values. Since the rest of the transform
7516 // is bitwise only, we switch now to an unsigned representation.
7517
7518 // This transform can be done speculatively because it is so cheap - it
7519 // results in a single rotate operation being inserted.
7520
7521 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7522 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7523 // less than 64.
7524 unsigned Shift = 64;
7525 for (auto &V : Values)
7526 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7527 assert(Shift < 64);
7528 if (Shift > 0)
7529 for (auto &V : Values)
7530 V = (int64_t)((uint64_t)V >> Shift);
7531
7532 if (!isSwitchDense(Values))
7533 // Transform didn't create a dense switch.
7534 return false;
7535
7536 // The obvious transform is to shift the switch condition right and emit a
7537 // check that the condition actually cleanly divided by GCD, i.e.
7538 // C & (1 << Shift - 1) == 0
7539 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7540 //
7541 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7542 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7543 // are nonzero then the switch condition will be very large and will hit the
7544 // default case.
7545
7546 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7547 Builder.SetInsertPoint(SI);
7548 Value *Sub =
7549 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7550 Value *Rot = Builder.CreateIntrinsic(
7551 Ty, Intrinsic::fshl,
7552 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7553 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7554
7555 for (auto Case : SI->cases()) {
7556 auto *Orig = Case.getCaseValue();
7557 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7558 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7559 }
7560 return true;
7561}
7562
7563/// Tries to transform switch of powers of two to reduce switch range.
7564/// For example, switch like:
7565/// switch (C) { case 1: case 2: case 64: case 128: }
7566/// will be transformed to:
7567/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7568///
7569/// This transformation allows better lowering and may transform the switch
7570/// instruction into a sequence of bit manipulation and a smaller
7571/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7572/// address of the jump target, and indirectly jump to it).
7574 DomTreeUpdater *DTU,
7575 const DataLayout &DL,
7576 const TargetTransformInfo &TTI) {
7577 Value *Condition = SI->getCondition();
7578 LLVMContext &Context = SI->getContext();
7579 auto *CondTy = cast<IntegerType>(Condition->getType());
7580
7581 if (CondTy->getIntegerBitWidth() > 64 ||
7582 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7583 return false;
7584
7585 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7586 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7587 {Condition, ConstantInt::getTrue(Context)});
7588 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7589 TTI::TCC_Basic * 2)
7590 return false;
7591
7592 // Only bother with this optimization if there are more than 3 switch cases.
7593 // SDAG will start emitting jump tables for 4 or more cases.
7594 if (SI->getNumCases() < 4)
7595 return false;
7596
7597 // Check that switch cases are powers of two.
7599 for (const auto &Case : SI->cases()) {
7600 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7601 if (llvm::has_single_bit(CaseValue))
7602 Values.push_back(CaseValue);
7603 else
7604 return false;
7605 }
7606
7607 // isSwichDense requires case values to be sorted.
7608 llvm::sort(Values);
7609 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7610 llvm::countr_zero(Values.front()) + 1))
7611 // Transform is unable to generate dense switch.
7612 return false;
7613
7614 Builder.SetInsertPoint(SI);
7615
7616 if (!SI->defaultDestUnreachable()) {
7617 // Let non-power-of-two inputs jump to the default case, when the latter is
7618 // reachable.
7619 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7620 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7621
7622 auto *OrigBB = SI->getParent();
7623 auto *DefaultCaseBB = SI->getDefaultDest();
7624 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7625 auto It = OrigBB->getTerminator()->getIterator();
7626 BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7627 It->eraseFromParent();
7628
7629 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7630 if (DTU)
7631 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7632 }
7633
7634 // Replace each case with its trailing zeros number.
7635 for (auto &Case : SI->cases()) {
7636 auto *OrigValue = Case.getCaseValue();
7637 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7638 OrigValue->getValue().countr_zero()));
7639 }
7640
7641 // Replace condition with its trailing zeros number.
7642 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7643 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7644
7645 SI->setCondition(ConditionTrailingZeros);
7646
7647 return true;
7648}
7649
7650/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7651/// the same destination.
7653 DomTreeUpdater *DTU) {
7654 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7655 if (!Cmp || !Cmp->hasOneUse())
7656 return false;
7657
7659 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7660 if (!HasWeights)
7661 Weights.resize(4); // Avoid checking HasWeights everywhere.
7662
7663 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7664 int64_t Res;
7665 BasicBlock *Succ, *OtherSucc;
7666 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7667 BasicBlock *Unreachable = nullptr;
7668
7669 if (SI->getNumCases() == 2) {
7670 // Find which of 1, 0 or -1 is missing (handled by default dest).
7671 SmallSet<int64_t, 3> Missing;
7672 Missing.insert(1);
7673 Missing.insert(0);
7674 Missing.insert(-1);
7675
7676 Succ = SI->getDefaultDest();
7677 SuccWeight = Weights[0];
7678 OtherSucc = nullptr;
7679 for (auto &Case : SI->cases()) {
7680 std::optional<int64_t> Val =
7681 Case.getCaseValue()->getValue().trySExtValue();
7682 if (!Val)
7683 return false;
7684 if (!Missing.erase(*Val))
7685 return false;
7686 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7687 return false;
7688 OtherSucc = Case.getCaseSuccessor();
7689 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7690 }
7691
7692 assert(Missing.size() == 1 && "Should have one case left");
7693 Res = *Missing.begin();
7694 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7695 // Normalize so that Succ is taken once and OtherSucc twice.
7696 Unreachable = SI->getDefaultDest();
7697 Succ = OtherSucc = nullptr;
7698 for (auto &Case : SI->cases()) {
7699 BasicBlock *NewSucc = Case.getCaseSuccessor();
7700 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7701 if (!OtherSucc || OtherSucc == NewSucc) {
7702 OtherSucc = NewSucc;
7703 OtherSuccWeight += Weight;
7704 } else if (!Succ) {
7705 Succ = NewSucc;
7706 SuccWeight = Weight;
7707 } else if (Succ == NewSucc) {
7708 std::swap(Succ, OtherSucc);
7709 std::swap(SuccWeight, OtherSuccWeight);
7710 } else
7711 return false;
7712 }
7713 for (auto &Case : SI->cases()) {
7714 std::optional<int64_t> Val =
7715 Case.getCaseValue()->getValue().trySExtValue();
7716 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7717 return false;
7718 if (Case.getCaseSuccessor() == Succ) {
7719 Res = *Val;
7720 break;
7721 }
7722 }
7723 } else {
7724 return false;
7725 }
7726
7727 // Determine predicate for the missing case.
7729 switch (Res) {
7730 case 1:
7731 Pred = ICmpInst::ICMP_UGT;
7732 break;
7733 case 0:
7734 Pred = ICmpInst::ICMP_EQ;
7735 break;
7736 case -1:
7737 Pred = ICmpInst::ICMP_ULT;
7738 break;
7739 }
7740 if (Cmp->isSigned())
7741 Pred = ICmpInst::getSignedPredicate(Pred);
7742
7743 MDNode *NewWeights = nullptr;
7744 if (HasWeights)
7745 NewWeights = MDBuilder(SI->getContext())
7746 .createBranchWeights(SuccWeight, OtherSuccWeight);
7747
7748 BasicBlock *BB = SI->getParent();
7749 Builder.SetInsertPoint(SI->getIterator());
7750 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7751 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7752 SI->getMetadata(LLVMContext::MD_unpredictable));
7753 OtherSucc->removePredecessor(BB);
7754 if (Unreachable)
7755 Unreachable->removePredecessor(BB);
7756 SI->eraseFromParent();
7757 Cmp->eraseFromParent();
7758 if (DTU && Unreachable)
7759 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7760 return true;
7761}
7762
7763/// Checking whether two cases of SI are equal depends on the contents of the
7764/// BasicBlock and the incoming values of their successor PHINodes.
7765/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7766/// calling this function on each BasicBlock every time isEqual is called,
7767/// especially since the same BasicBlock may be passed as an argument multiple
7768/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7769/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7770/// of the incoming values.
7775
7778 return static_cast<SwitchSuccWrapper *>(
7780 }
7782 return static_cast<SwitchSuccWrapper *>(
7784 }
7785 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7786 BasicBlock *Succ = SSW->Dest;
7788 assert(BI->isUnconditional() &&
7789 "Only supporting unconditional branches for now");
7790 assert(BI->getNumSuccessors() == 1 &&
7791 "Expected unconditional branches to have one successor");
7792 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7793
7794 // Since we assume the BB is just a single BranchInst with a single
7795 // successor, we hash as the BB and the incoming Values of its successor
7796 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7797 // including the incoming PHI values leads to better performance.
7798 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7799 // time and passing it in SwitchSuccWrapper, but this slowed down the
7800 // average compile time without having any impact on the worst case compile
7801 // time.
7802 BasicBlock *BB = BI->getSuccessor(0);
7803 SmallVector<Value *> PhiValsForBB;
7804 for (PHINode &Phi : BB->phis())
7805 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7806
7807 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7808 }
7809 static bool isEqual(const SwitchSuccWrapper *LHS,
7810 const SwitchSuccWrapper *RHS) {
7813 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7814 return LHS == RHS;
7815
7816 BasicBlock *A = LHS->Dest;
7817 BasicBlock *B = RHS->Dest;
7818
7819 // FIXME: we checked that the size of A and B are both 1 in
7820 // simplifyDuplicateSwitchArms to make the Case list smaller to
7821 // improve performance. If we decide to support BasicBlocks with more
7822 // than just a single instruction, we need to check that A.size() ==
7823 // B.size() here, and we need to check more than just the BranchInsts
7824 // for equality.
7825
7826 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7827 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7828 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7829 "Only supporting unconditional branches for now");
7830 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7831 return false;
7832
7833 // Need to check that PHIs in successor have matching values
7834 BasicBlock *Succ = ABI->getSuccessor(0);
7835 for (PHINode &Phi : Succ->phis()) {
7836 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7837 if (PredIVs[A] != PredIVs[B])
7838 return false;
7839 }
7840
7841 return true;
7842 }
7843};
7844
7845bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7846 DomTreeUpdater *DTU) {
7847 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7848 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7849 // an entire PHI at once after the loop, opposed to calling
7850 // getIncomingValueForBlock inside this loop, since each call to
7851 // getIncomingValueForBlock is O(|Preds|).
7857 Cases.reserve(SI->getNumSuccessors());
7858
7859 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7860 BasicBlock *BB = SI->getSuccessor(I);
7861
7862 // FIXME: Support more than just a single BranchInst. One way we could do
7863 // this is by taking a hashing approach of all insts in BB.
7864 if (BB->size() != 1)
7865 continue;
7866
7867 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7868 // on other kinds of terminators. We decide to only support unconditional
7869 // branches for now for compile time reasons.
7870 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7871 if (!BI || BI->isConditional())
7872 continue;
7873
7874 if (!Seen.insert(BB).second) {
7875 auto It = BBToSuccessorIndexes.find(BB);
7876 if (It != BBToSuccessorIndexes.end())
7877 It->second.emplace_back(I);
7878 continue;
7879 }
7880
7881 // FIXME: This case needs some extra care because the terminators other than
7882 // SI need to be updated. For now, consider only backedges to the SI.
7883 if (BB->getUniquePredecessor() != SI->getParent())
7884 continue;
7885
7886 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7887 for (BasicBlock *Succ : BI->successors())
7889
7890 // Add the successor only if not previously visited.
7891 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7892 BBToSuccessorIndexes[BB].emplace_back(I);
7893 }
7894
7895 // Precompute a data structure to improve performance of isEqual for
7896 // SwitchSuccWrapper.
7897 PhiPredIVs.reserve(Phis.size());
7898 for (PHINode *Phi : Phis) {
7899 auto &IVs =
7900 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7901 for (auto &IV : Phi->incoming_values())
7902 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7903 }
7904
7905 // Build a set such that if the SwitchSuccWrapper exists in the set and
7906 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7907 // which is not in the set should be replaced with the one in the set. If the
7908 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7909 // other SwitchSuccWrappers can check against it in the same manner. We use
7910 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7911 // around information to isEquality, getHashValue, and when doing the
7912 // replacement with better performance.
7913 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7914 ReplaceWith.reserve(Cases.size());
7915
7917 Updates.reserve(ReplaceWith.size());
7918 bool MadeChange = false;
7919 for (auto &SSW : Cases) {
7920 // SSW is a candidate for simplification. If we find a duplicate BB,
7921 // replace it.
7922 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7923 if (!Inserted) {
7924 // We know that SI's parent BB no longer dominates the old case successor
7925 // since we are making it dead.
7926 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7927 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7928 for (unsigned Idx : Successors)
7929 SI->setSuccessor(Idx, (*It)->Dest);
7930 MadeChange = true;
7931 }
7932 }
7933
7934 if (DTU)
7935 DTU->applyUpdates(Updates);
7936
7937 return MadeChange;
7938}
7939
7940bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7941 BasicBlock *BB = SI->getParent();
7942
7943 if (isValueEqualityComparison(SI)) {
7944 // If we only have one predecessor, and if it is a branch on this value,
7945 // see if that predecessor totally determines the outcome of this switch.
7946 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7947 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7948 return requestResimplify();
7949
7950 Value *Cond = SI->getCondition();
7951 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7952 if (simplifySwitchOnSelect(SI, Select))
7953 return requestResimplify();
7954
7955 // If the block only contains the switch, see if we can fold the block
7956 // away into any preds.
7957 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7958 if (foldValueComparisonIntoPredecessors(SI, Builder))
7959 return requestResimplify();
7960 }
7961
7962 // Try to transform the switch into an icmp and a branch.
7963 // The conversion from switch to comparison may lose information on
7964 // impossible switch values, so disable it early in the pipeline.
7965 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7966 return requestResimplify();
7967
7968 // Remove unreachable cases.
7969 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7970 return requestResimplify();
7971
7972 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7973 return requestResimplify();
7974
7975 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7976 return requestResimplify();
7977
7978 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7979 return requestResimplify();
7980
7981 // The conversion of switches to arithmetic or lookup table is disabled in
7982 // the early optimization pipeline, as it may lose information or make the
7983 // resulting code harder to analyze.
7984 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
7985 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
7986 Options.ConvertSwitchToLookupTable))
7987 return requestResimplify();
7988
7989 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
7990 return requestResimplify();
7991
7992 if (reduceSwitchRange(SI, Builder, DL, TTI))
7993 return requestResimplify();
7994
7995 if (HoistCommon &&
7996 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7997 return requestResimplify();
7998
7999 if (simplifyDuplicateSwitchArms(SI, DTU))
8000 return requestResimplify();
8001
8002 return false;
8003}
8004
8005bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8006 BasicBlock *BB = IBI->getParent();
8007 bool Changed = false;
8008 SmallVector<uint32_t> BranchWeights;
8009 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8010 extractBranchWeights(*IBI, BranchWeights);
8011
8012 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8013 if (HasBranchWeights)
8014 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8015 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8016
8017 // Eliminate redundant destinations.
8018 SmallPtrSet<Value *, 8> Succs;
8019 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8020 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8021 BasicBlock *Dest = IBI->getDestination(I);
8022 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8023 if (!Dest->hasAddressTaken())
8024 RemovedSuccs.insert(Dest);
8025 Dest->removePredecessor(BB);
8026 IBI->removeDestination(I);
8027 --I;
8028 --E;
8029 Changed = true;
8030 }
8031 }
8032
8033 if (DTU) {
8034 std::vector<DominatorTree::UpdateType> Updates;
8035 Updates.reserve(RemovedSuccs.size());
8036 for (auto *RemovedSucc : RemovedSuccs)
8037 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8038 DTU->applyUpdates(Updates);
8039 }
8040
8041 if (IBI->getNumDestinations() == 0) {
8042 // If the indirectbr has no successors, change it to unreachable.
8043 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8045 return true;
8046 }
8047
8048 if (IBI->getNumDestinations() == 1) {
8049 // If the indirectbr has one successor, change it to a direct branch.
8052 return true;
8053 }
8054 if (HasBranchWeights) {
8055 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8056 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8057 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8058 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8059 }
8060 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8061 if (simplifyIndirectBrOnSelect(IBI, SI))
8062 return requestResimplify();
8063 }
8064 return Changed;
8065}
8066
8067/// Given an block with only a single landing pad and a unconditional branch
8068/// try to find another basic block which this one can be merged with. This
8069/// handles cases where we have multiple invokes with unique landing pads, but
8070/// a shared handler.
8071///
8072/// We specifically choose to not worry about merging non-empty blocks
8073/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8074/// practice, the optimizer produces empty landing pad blocks quite frequently
8075/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8076/// sinking in this file)
8077///
8078/// This is primarily a code size optimization. We need to avoid performing
8079/// any transform which might inhibit optimization (such as our ability to
8080/// specialize a particular handler via tail commoning). We do this by not
8081/// merging any blocks which require us to introduce a phi. Since the same
8082/// values are flowing through both blocks, we don't lose any ability to
8083/// specialize. If anything, we make such specialization more likely.
8084///
8085/// TODO - This transformation could remove entries from a phi in the target
8086/// block when the inputs in the phi are the same for the two blocks being
8087/// merged. In some cases, this could result in removal of the PHI entirely.
8089 BasicBlock *BB, DomTreeUpdater *DTU) {
8090 auto Succ = BB->getUniqueSuccessor();
8091 assert(Succ);
8092 // If there's a phi in the successor block, we'd likely have to introduce
8093 // a phi into the merged landing pad block.
8094 if (isa<PHINode>(*Succ->begin()))
8095 return false;
8096
8097 for (BasicBlock *OtherPred : predecessors(Succ)) {
8098 if (BB == OtherPred)
8099 continue;
8100 BasicBlock::iterator I = OtherPred->begin();
8102 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8103 continue;
8104 ++I;
8106 if (!BI2 || !BI2->isIdenticalTo(BI))
8107 continue;
8108
8109 std::vector<DominatorTree::UpdateType> Updates;
8110
8111 // We've found an identical block. Update our predecessors to take that
8112 // path instead and make ourselves dead.
8114 for (BasicBlock *Pred : UniquePreds) {
8115 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8116 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8117 "unexpected successor");
8118 II->setUnwindDest(OtherPred);
8119 if (DTU) {
8120 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8121 Updates.push_back({DominatorTree::Delete, Pred, BB});
8122 }
8123 }
8124
8126 for (BasicBlock *Succ : UniqueSuccs) {
8127 Succ->removePredecessor(BB);
8128 if (DTU)
8129 Updates.push_back({DominatorTree::Delete, BB, Succ});
8130 }
8131
8132 IRBuilder<> Builder(BI);
8133 Builder.CreateUnreachable();
8134 BI->eraseFromParent();
8135 if (DTU)
8136 DTU->applyUpdates(Updates);
8137 return true;
8138 }
8139 return false;
8140}
8141
8142bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8143 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8144 : simplifyCondBranch(Branch, Builder);
8145}
8146
8147bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8148 IRBuilder<> &Builder) {
8149 BasicBlock *BB = BI->getParent();
8150 BasicBlock *Succ = BI->getSuccessor(0);
8151
8152 // If the Terminator is the only non-phi instruction, simplify the block.
8153 // If LoopHeader is provided, check if the block or its successor is a loop
8154 // header. (This is for early invocations before loop simplify and
8155 // vectorization to keep canonical loop forms for nested loops. These blocks
8156 // can be eliminated when the pass is invoked later in the back-end.)
8157 // Note that if BB has only one predecessor then we do not introduce new
8158 // backedge, so we can eliminate BB.
8159 bool NeedCanonicalLoop =
8160 Options.NeedCanonicalLoop &&
8161 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8162 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8164 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8165 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8166 return true;
8167
8168 // If the only instruction in the block is a seteq/setne comparison against a
8169 // constant, try to simplify the block.
8170 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
8171 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8172 ++I;
8173 if (I->isTerminator() &&
8174 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8175 return true;
8176 }
8177
8178 // See if we can merge an empty landing pad block with another which is
8179 // equivalent.
8180 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8181 ++I;
8182 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8183 return true;
8184 }
8185
8186 // If this basic block is ONLY a compare and a branch, and if a predecessor
8187 // branches to us and our successor, fold the comparison into the
8188 // predecessor and use logical operations to update the incoming value
8189 // for PHI nodes in common successor.
8190 if (Options.SpeculateBlocks &&
8191 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8192 Options.BonusInstThreshold))
8193 return requestResimplify();
8194 return false;
8195}
8196
8198 BasicBlock *PredPred = nullptr;
8199 for (auto *P : predecessors(BB)) {
8200 BasicBlock *PPred = P->getSinglePredecessor();
8201 if (!PPred || (PredPred && PredPred != PPred))
8202 return nullptr;
8203 PredPred = PPred;
8204 }
8205 return PredPred;
8206}
8207
8208/// Fold the following pattern:
8209/// bb0:
8210/// br i1 %cond1, label %bb1, label %bb2
8211/// bb1:
8212/// br i1 %cond2, label %bb3, label %bb4
8213/// bb2:
8214/// br i1 %cond2, label %bb4, label %bb3
8215/// bb3:
8216/// ...
8217/// bb4:
8218/// ...
8219/// into
8220/// bb0:
8221/// %cond = xor i1 %cond1, %cond2
8222/// br i1 %cond, label %bb4, label %bb3
8223/// bb3:
8224/// ...
8225/// bb4:
8226/// ...
8227/// NOTE: %cond2 always dominates the terminator of bb0.
8229 BasicBlock *BB = BI->getParent();
8230 BasicBlock *BB1 = BI->getSuccessor(0);
8231 BasicBlock *BB2 = BI->getSuccessor(1);
8232 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8233 if (Succ == BB)
8234 return false;
8235 if (&Succ->front() != Succ->getTerminator())
8236 return false;
8237 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8238 if (!SuccBI || !SuccBI->isConditional())
8239 return false;
8240 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8241 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8242 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8243 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8244 };
8245 BranchInst *BB1BI, *BB2BI;
8246 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8247 return false;
8248
8249 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8250 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8251 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8252 return false;
8253
8254 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8255 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8256 IRBuilder<> Builder(BI);
8257 BI->setCondition(
8258 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8259 BB1->removePredecessor(BB);
8260 BI->setSuccessor(0, BB4);
8261 BB2->removePredecessor(BB);
8262 BI->setSuccessor(1, BB3);
8263 if (DTU) {
8265 Updates.push_back({DominatorTree::Delete, BB, BB1});
8266 Updates.push_back({DominatorTree::Insert, BB, BB4});
8267 Updates.push_back({DominatorTree::Delete, BB, BB2});
8268 Updates.push_back({DominatorTree::Insert, BB, BB3});
8269
8270 DTU->applyUpdates(Updates);
8271 }
8272 bool HasWeight = false;
8273 uint64_t BBTWeight, BBFWeight;
8274 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8275 HasWeight = true;
8276 else
8277 BBTWeight = BBFWeight = 1;
8278 uint64_t BB1TWeight, BB1FWeight;
8279 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8280 HasWeight = true;
8281 else
8282 BB1TWeight = BB1FWeight = 1;
8283 uint64_t BB2TWeight, BB2FWeight;
8284 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8285 HasWeight = true;
8286 else
8287 BB2TWeight = BB2FWeight = 1;
8288 if (HasWeight) {
8289 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8290 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8291 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8292 /*ElideAllZero=*/true);
8293 }
8294 return true;
8295}
8296
8297bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8298 assert(
8300 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8301 "Tautological conditional branch should have been eliminated already.");
8302
8303 BasicBlock *BB = BI->getParent();
8304 if (!Options.SimplifyCondBranch ||
8305 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8306 return false;
8307
8308 // Conditional branch
8309 if (isValueEqualityComparison(BI)) {
8310 // If we only have one predecessor, and if it is a branch on this value,
8311 // see if that predecessor totally determines the outcome of this
8312 // switch.
8313 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8314 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8315 return requestResimplify();
8316
8317 // This block must be empty, except for the setcond inst, if it exists.
8318 // Ignore dbg and pseudo intrinsics.
8319 auto I = BB->instructionsWithoutDebug(true).begin();
8320 if (&*I == BI) {
8321 if (foldValueComparisonIntoPredecessors(BI, Builder))
8322 return requestResimplify();
8323 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8324 ++I;
8325 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8326 return requestResimplify();
8327 }
8328 }
8329
8330 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8331 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8332 return true;
8333
8334 // If this basic block has dominating predecessor blocks and the dominating
8335 // blocks' conditions imply BI's condition, we know the direction of BI.
8336 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8337 if (Imp) {
8338 // Turn this into a branch on constant.
8339 auto *OldCond = BI->getCondition();
8340 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8341 : ConstantInt::getFalse(BB->getContext());
8342 BI->setCondition(TorF);
8344 return requestResimplify();
8345 }
8346
8347 // If this basic block is ONLY a compare and a branch, and if a predecessor
8348 // branches to us and one of our successors, fold the comparison into the
8349 // predecessor and use logical operations to pick the right destination.
8350 if (Options.SpeculateBlocks &&
8351 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8352 Options.BonusInstThreshold))
8353 return requestResimplify();
8354
8355 // We have a conditional branch to two blocks that are only reachable
8356 // from BI. We know that the condbr dominates the two blocks, so see if
8357 // there is any identical code in the "then" and "else" blocks. If so, we
8358 // can hoist it up to the branching block.
8359 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8360 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8361 if (HoistCommon &&
8362 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8363 return requestResimplify();
8364
8365 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8366 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8367 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8368 auto CanSpeculateConditionalLoadsStores = [&]() {
8369 for (auto *Succ : successors(BB)) {
8370 for (Instruction &I : *Succ) {
8371 if (I.isTerminator()) {
8372 if (I.getNumSuccessors() > 1)
8373 return false;
8374 continue;
8375 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8376 SpeculatedConditionalLoadsStores.size() ==
8378 return false;
8379 }
8380 SpeculatedConditionalLoadsStores.push_back(&I);
8381 }
8382 }
8383 return !SpeculatedConditionalLoadsStores.empty();
8384 };
8385
8386 if (CanSpeculateConditionalLoadsStores()) {
8387 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8388 std::nullopt, nullptr);
8389 return requestResimplify();
8390 }
8391 }
8392 } else {
8393 // If Successor #1 has multiple preds, we may be able to conditionally
8394 // execute Successor #0 if it branches to Successor #1.
8395 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8396 if (Succ0TI->getNumSuccessors() == 1 &&
8397 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8398 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8399 return requestResimplify();
8400 }
8401 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8402 // If Successor #0 has multiple preds, we may be able to conditionally
8403 // execute Successor #1 if it branches to Successor #0.
8404 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8405 if (Succ1TI->getNumSuccessors() == 1 &&
8406 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8407 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8408 return requestResimplify();
8409 }
8410
8411 // If this is a branch on something for which we know the constant value in
8412 // predecessors (e.g. a phi node in the current block), thread control
8413 // through this block.
8414 if (foldCondBranchOnValueKnownInPredecessor(BI))
8415 return requestResimplify();
8416
8417 // Scan predecessor blocks for conditional branches.
8418 for (BasicBlock *Pred : predecessors(BB))
8419 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8420 if (PBI != BI && PBI->isConditional())
8421 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8422 return requestResimplify();
8423
8424 // Look for diamond patterns.
8425 if (MergeCondStores)
8426 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8427 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8428 if (PBI != BI && PBI->isConditional())
8429 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8430 return requestResimplify();
8431
8432 // Look for nested conditional branches.
8433 if (mergeNestedCondBranch(BI, DTU))
8434 return requestResimplify();
8435
8436 return false;
8437}
8438
8439/// Check if passing a value to an instruction will cause undefined behavior.
8440static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8441 assert(V->getType() == I->getType() && "Mismatched types");
8443 if (!C)
8444 return false;
8445
8446 if (I->use_empty())
8447 return false;
8448
8449 if (C->isNullValue() || isa<UndefValue>(C)) {
8450 // Only look at the first use we can handle, avoid hurting compile time with
8451 // long uselists
8452 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8453 auto *Use = cast<Instruction>(U.getUser());
8454 // Change this list when we want to add new instructions.
8455 switch (Use->getOpcode()) {
8456 default:
8457 return false;
8458 case Instruction::GetElementPtr:
8459 case Instruction::Ret:
8460 case Instruction::BitCast:
8461 case Instruction::Load:
8462 case Instruction::Store:
8463 case Instruction::Call:
8464 case Instruction::CallBr:
8465 case Instruction::Invoke:
8466 case Instruction::UDiv:
8467 case Instruction::URem:
8468 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8469 // implemented to avoid code complexity as it is unclear how useful such
8470 // logic is.
8471 case Instruction::SDiv:
8472 case Instruction::SRem:
8473 return true;
8474 }
8475 });
8476 if (FindUse == I->use_end())
8477 return false;
8478 auto &Use = *FindUse;
8479 auto *User = cast<Instruction>(Use.getUser());
8480 // Bail out if User is not in the same BB as I or User == I or User comes
8481 // before I in the block. The latter two can be the case if User is a
8482 // PHI node.
8483 if (User->getParent() != I->getParent() || User == I ||
8484 User->comesBefore(I))
8485 return false;
8486
8487 // Now make sure that there are no instructions in between that can alter
8488 // control flow (eg. calls)
8489 auto InstrRange =
8490 make_range(std::next(I->getIterator()), User->getIterator());
8491 if (any_of(InstrRange, [](Instruction &I) {
8493 }))
8494 return false;
8495
8496 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8498 if (GEP->getPointerOperand() == I) {
8499 // The type of GEP may differ from the type of base pointer.
8500 // Bail out on vector GEPs, as they are not handled by other checks.
8501 if (GEP->getType()->isVectorTy())
8502 return false;
8503 // The current base address is null, there are four cases to consider:
8504 // getelementptr (TY, null, 0) -> null
8505 // getelementptr (TY, null, not zero) -> may be modified
8506 // getelementptr inbounds (TY, null, 0) -> null
8507 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8508 // undefined?
8509 if (!GEP->hasAllZeroIndices() &&
8510 (!GEP->isInBounds() ||
8511 NullPointerIsDefined(GEP->getFunction(),
8512 GEP->getPointerAddressSpace())))
8513 PtrValueMayBeModified = true;
8514 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8515 }
8516
8517 // Look through return.
8518 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8519 bool HasNoUndefAttr =
8520 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8521 // Return undefined to a noundef return value is undefined.
8522 if (isa<UndefValue>(C) && HasNoUndefAttr)
8523 return true;
8524 // Return null to a nonnull+noundef return value is undefined.
8525 if (C->isNullValue() && HasNoUndefAttr &&
8526 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8527 return !PtrValueMayBeModified;
8528 }
8529 }
8530
8531 // Load from null is undefined.
8532 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8533 if (!LI->isVolatile())
8534 return !NullPointerIsDefined(LI->getFunction(),
8535 LI->getPointerAddressSpace());
8536
8537 // Store to null is undefined.
8539 if (!SI->isVolatile())
8540 return (!NullPointerIsDefined(SI->getFunction(),
8541 SI->getPointerAddressSpace())) &&
8542 SI->getPointerOperand() == I;
8543
8544 // llvm.assume(false/undef) always triggers immediate UB.
8545 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8546 // Ignore assume operand bundles.
8547 if (I == Assume->getArgOperand(0))
8548 return true;
8549 }
8550
8551 if (auto *CB = dyn_cast<CallBase>(User)) {
8552 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8553 return false;
8554 // A call to null is undefined.
8555 if (CB->getCalledOperand() == I)
8556 return true;
8557
8558 if (CB->isArgOperand(&Use)) {
8559 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8560 // Passing null to a nonnnull+noundef argument is undefined.
8562 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8563 return !PtrValueMayBeModified;
8564 // Passing undef to a noundef argument is undefined.
8565 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8566 return true;
8567 }
8568 }
8569 // Div/Rem by zero is immediate UB
8570 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8571 return true;
8572 }
8573 return false;
8574}
8575
8576/// If BB has an incoming value that will always trigger undefined behavior
8577/// (eg. null pointer dereference), remove the branch leading here.
8579 DomTreeUpdater *DTU,
8580 AssumptionCache *AC) {
8581 for (PHINode &PHI : BB->phis())
8582 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8583 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8584 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8585 Instruction *T = Predecessor->getTerminator();
8586 IRBuilder<> Builder(T);
8587 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8588 BB->removePredecessor(Predecessor);
8589 // Turn unconditional branches into unreachables and remove the dead
8590 // destination from conditional branches.
8591 if (BI->isUnconditional())
8592 Builder.CreateUnreachable();
8593 else {
8594 // Preserve guarding condition in assume, because it might not be
8595 // inferrable from any dominating condition.
8596 Value *Cond = BI->getCondition();
8597 CallInst *Assumption;
8598 if (BI->getSuccessor(0) == BB)
8599 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8600 else
8601 Assumption = Builder.CreateAssumption(Cond);
8602 if (AC)
8603 AC->registerAssumption(cast<AssumeInst>(Assumption));
8604 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8605 : BI->getSuccessor(0));
8606 }
8607 BI->eraseFromParent();
8608 if (DTU)
8609 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8610 return true;
8611 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8612 // Redirect all branches leading to UB into
8613 // a newly created unreachable block.
8614 BasicBlock *Unreachable = BasicBlock::Create(
8615 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8616 Builder.SetInsertPoint(Unreachable);
8617 // The new block contains only one instruction: Unreachable
8618 Builder.CreateUnreachable();
8619 for (const auto &Case : SI->cases())
8620 if (Case.getCaseSuccessor() == BB) {
8621 BB->removePredecessor(Predecessor);
8622 Case.setSuccessor(Unreachable);
8623 }
8624 if (SI->getDefaultDest() == BB) {
8625 BB->removePredecessor(Predecessor);
8626 SI->setDefaultDest(Unreachable);
8627 }
8628
8629 if (DTU)
8630 DTU->applyUpdates(
8631 { { DominatorTree::Insert, Predecessor, Unreachable },
8632 { DominatorTree::Delete, Predecessor, BB } });
8633 return true;
8634 }
8635 }
8636
8637 return false;
8638}
8639
8640bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8641 bool Changed = false;
8642
8643 assert(BB && BB->getParent() && "Block not embedded in function!");
8644 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8645
8646 // Remove basic blocks that have no predecessors (except the entry block)...
8647 // or that just have themself as a predecessor. These are unreachable.
8648 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8649 BB->getSinglePredecessor() == BB) {
8650 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8651 DeleteDeadBlock(BB, DTU);
8652 return true;
8653 }
8654
8655 // Check to see if we can constant propagate this terminator instruction
8656 // away...
8657 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8658 /*TLI=*/nullptr, DTU);
8659
8660 // Check for and eliminate duplicate PHI nodes in this block.
8662
8663 // Check for and remove branches that will always cause undefined behavior.
8665 return requestResimplify();
8666
8667 // Merge basic blocks into their predecessor if there is only one distinct
8668 // pred, and if there is only one distinct successor of the predecessor, and
8669 // if there are no PHI nodes.
8670 if (MergeBlockIntoPredecessor(BB, DTU))
8671 return true;
8672
8673 if (SinkCommon && Options.SinkCommonInsts)
8674 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8675 mergeCompatibleInvokes(BB, DTU)) {
8676 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8677 // so we may now how duplicate PHI's.
8678 // Let's rerun EliminateDuplicatePHINodes() first,
8679 // before foldTwoEntryPHINode() potentially converts them into select's,
8680 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8681 return true;
8682 }
8683
8684 IRBuilder<> Builder(BB);
8685
8686 if (Options.SpeculateBlocks &&
8687 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8688 // If there is a trivial two-entry PHI node in this basic block, and we can
8689 // eliminate it, do so now.
8690 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8691 if (PN->getNumIncomingValues() == 2)
8692 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8693 Options.SpeculateUnpredictables))
8694 return true;
8695 }
8696
8698 Builder.SetInsertPoint(Terminator);
8699 switch (Terminator->getOpcode()) {
8700 case Instruction::Br:
8701 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8702 break;
8703 case Instruction::Resume:
8704 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8705 break;
8706 case Instruction::CleanupRet:
8707 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8708 break;
8709 case Instruction::Switch:
8710 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8711 break;
8712 case Instruction::Unreachable:
8713 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8714 break;
8715 case Instruction::IndirectBr:
8716 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8717 break;
8718 }
8719
8720 return Changed;
8721}
8722
8723bool SimplifyCFGOpt::run(BasicBlock *BB) {
8724 bool Changed = false;
8725
8726 // Repeated simplify BB as long as resimplification is requested.
8727 do {
8728 Resimplify = false;
8729
8730 // Perform one round of simplifcation. Resimplify flag will be set if
8731 // another iteration is requested.
8732 Changed |= simplifyOnce(BB);
8733 } while (Resimplify);
8734
8735 return Changed;
8736}
8737
8740 ArrayRef<WeakVH> LoopHeaders) {
8741 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8742 Options)
8743 .run(BB);
8744}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:193
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1688
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2128
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3871
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1582
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2088
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257