LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205
206} // end namespace llvm
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
294 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
295 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
296
297 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
298 IRBuilder<> &Builder);
299 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
300 SelectInst *Select,
301 IRBuilder<> &Builder);
302 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
303 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
304 Instruction *TI, Instruction *I1,
305 SmallVectorImpl<Instruction *> &OtherSuccTIs,
306 ArrayRef<BasicBlock *> UniqueSuccessors);
307 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
308 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
309 BasicBlock *TrueBB, BasicBlock *FalseBB,
310 uint32_t TrueWeight, uint32_t FalseWeight);
311 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
312 const DataLayout &DL);
313 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
314 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
315 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
316 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
317
318public:
319 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
320 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
321 const SimplifyCFGOptions &Opts)
322 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
323 assert((!DTU || !DTU->hasPostDomTree()) &&
324 "SimplifyCFG is not yet capable of maintaining validity of a "
325 "PostDomTree, so don't ask for it.");
326 }
327
328 bool simplifyOnce(BasicBlock *BB);
329 bool run(BasicBlock *BB);
330
331 // Helper to set Resimplify and return change indication.
332 bool requestResimplify() {
333 Resimplify = true;
334 return true;
335 }
336};
337
338// we synthesize a || b as select a, true, b
339// we synthesize a && b as select a, b, false
340// this function determines if SI is playing one of those roles.
341[[maybe_unused]] bool
342isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
343 return ((isa<ConstantInt>(SI->getTrueValue()) &&
344 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
345 (isa<ConstantInt>(SI->getFalseValue()) &&
346 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
347}
348
349} // end anonymous namespace
350
351/// Return true if all the PHI nodes in the basic block \p BB
352/// receive compatible (identical) incoming values when coming from
353/// all of the predecessor blocks that are specified in \p IncomingBlocks.
354///
355/// Note that if the values aren't exactly identical, but \p EquivalenceSet
356/// is provided, and *both* of the values are present in the set,
357/// then they are considered equal.
359 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
360 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
361 assert(IncomingBlocks.size() == 2 &&
362 "Only for a pair of incoming blocks at the time!");
363
364 // FIXME: it is okay if one of the incoming values is an `undef` value,
365 // iff the other incoming value is guaranteed to be a non-poison value.
366 // FIXME: it is okay if one of the incoming values is a `poison` value.
367 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
368 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
369 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
370 if (IV0 == IV1)
371 return true;
372 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
373 EquivalenceSet->contains(IV1))
374 return true;
375 return false;
376 });
377}
378
379/// Return true if it is safe to merge these two
380/// terminator instructions together.
381static bool
383 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
384 if (SI1 == SI2)
385 return false; // Can't merge with self!
386
387 // It is not safe to merge these two switch instructions if they have a common
388 // successor, and if that successor has a PHI node, and if *that* PHI node has
389 // conflicting incoming values from the two switch blocks.
390 BasicBlock *SI1BB = SI1->getParent();
391 BasicBlock *SI2BB = SI2->getParent();
392
394 bool Fail = false;
395 for (BasicBlock *Succ : successors(SI2BB)) {
396 if (!SI1Succs.count(Succ))
397 continue;
398 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
399 continue;
400 Fail = true;
401 if (FailBlocks)
402 FailBlocks->insert(Succ);
403 else
404 break;
405 }
406
407 return !Fail;
408}
409
410/// Update PHI nodes in Succ to indicate that there will now be entries in it
411/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
412/// will be the same as those coming in from ExistPred, an existing predecessor
413/// of Succ.
414static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
415 BasicBlock *ExistPred,
416 MemorySSAUpdater *MSSAU = nullptr) {
417 for (PHINode &PN : Succ->phis())
418 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
419 if (MSSAU)
420 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
421 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
422}
423
424/// Compute an abstract "cost" of speculating the given instruction,
425/// which is assumed to be safe to speculate. TCC_Free means cheap,
426/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
427/// expensive.
429 const TargetTransformInfo &TTI) {
430 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
431}
432
433/// If we have a merge point of an "if condition" as accepted above,
434/// return true if the specified value dominates the block. We don't handle
435/// the true generality of domination here, just a special case which works
436/// well enough for us.
437///
438/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
439/// see if V (which must be an instruction) and its recursive operands
440/// that do not dominate BB have a combined cost lower than Budget and
441/// are non-trapping. If both are true, the instruction is inserted into the
442/// set and true is returned.
443///
444/// The cost for most non-trapping instructions is defined as 1 except for
445/// Select whose cost is 2.
446///
447/// After this function returns, Cost is increased by the cost of
448/// V plus its non-dominating operands. If that cost is greater than
449/// Budget, false is returned and Cost is undefined.
451 Value *V, BasicBlock *BB, Instruction *InsertPt,
452 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
454 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
455 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
456 // so limit the recursion depth.
457 // TODO: While this recursion limit does prevent pathological behavior, it
458 // would be better to track visited instructions to avoid cycles.
460 return false;
461
463 if (!I) {
464 // Non-instructions dominate all instructions and can be executed
465 // unconditionally.
466 return true;
467 }
468 BasicBlock *PBB = I->getParent();
469
470 // We don't want to allow weird loops that might have the "if condition" in
471 // the bottom of this block.
472 if (PBB == BB)
473 return false;
474
475 // If this instruction is defined in a block that contains an unconditional
476 // branch to BB, then it must be in the 'conditional' part of the "if
477 // statement". If not, it definitely dominates the region.
479 if (!BI || BI->getSuccessor() != BB)
480 return true;
481
482 // If we have seen this instruction before, don't count it again.
483 if (AggressiveInsts.count(I))
484 return true;
485
486 // Okay, it looks like the instruction IS in the "condition". Check to
487 // see if it's a cheap instruction to unconditionally compute, and if it
488 // only uses stuff defined outside of the condition. If so, hoist it out.
489 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
490 return false;
491
492 // Overflow arithmetic instruction plus extract value are usually generated
493 // when a division is being replaced. But, in this case, the zero check may
494 // still be kept in the code. In that case it would be worth to hoist these
495 // two instruction out of the basic block. Let's treat this pattern as one
496 // single cheap instruction here!
497 WithOverflowInst *OverflowInst;
498 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
499 ZeroCostInstructions.insert(OverflowInst);
500 Cost += 1;
501 } else if (!ZeroCostInstructions.contains(I))
502 Cost += computeSpeculationCost(I, TTI);
503
504 // Allow exactly one instruction to be speculated regardless of its cost
505 // (as long as it is safe to do so).
506 // This is intended to flatten the CFG even if the instruction is a division
507 // or other expensive operation. The speculation of an expensive instruction
508 // is expected to be undone in CodeGenPrepare if the speculation has not
509 // enabled further IR optimizations.
510 if (Cost > Budget &&
511 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
512 !Cost.isValid()))
513 return false;
514
515 // Okay, we can only really hoist these out if their operands do
516 // not take us over the cost threshold.
517 for (Use &Op : I->operands())
518 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
519 TTI, AC, ZeroCostInstructions, Depth + 1))
520 return false;
521 // Okay, it's safe to do this! Remember this instruction.
522 AggressiveInsts.insert(I);
523 return true;
524}
525
526/// Extract ConstantInt from value, looking through IntToPtr
527/// and PointerNullValue. Return NULL if value is not a constant int.
529 // Normal constant int.
531 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
532 return CI;
533
534 // It is not safe to look through inttoptr or ptrtoint when using unstable
535 // pointer types.
536 if (DL.hasUnstableRepresentation(V->getType()))
537 return nullptr;
538
539 // This is some kind of pointer constant. Turn it into a pointer-sized
540 // ConstantInt if possible.
541 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
542
543 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
545 return ConstantInt::get(IntPtrTy, 0);
546
547 // IntToPtr const int, we can look through this if the semantics of
548 // inttoptr for this address space are a simple (truncating) bitcast.
550 if (CE->getOpcode() == Instruction::IntToPtr)
551 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
552 // The constant is very likely to have the right type already.
553 if (CI->getType() == IntPtrTy)
554 return CI;
555 else
556 return cast<ConstantInt>(
557 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
558 }
559 return nullptr;
560}
561
562namespace {
563
564/// Given a chain of or (||) or and (&&) comparison of a value against a
565/// constant, this will try to recover the information required for a switch
566/// structure.
567/// It will depth-first traverse the chain of comparison, seeking for patterns
568/// like %a == 12 or %a < 4 and combine them to produce a set of integer
569/// representing the different cases for the switch.
570/// Note that if the chain is composed of '||' it will build the set of elements
571/// that matches the comparisons (i.e. any of this value validate the chain)
572/// while for a chain of '&&' it will build the set elements that make the test
573/// fail.
574struct ConstantComparesGatherer {
575 const DataLayout &DL;
576
577 /// Value found for the switch comparison
578 Value *CompValue = nullptr;
579
580 /// Extra clause to be checked before the switch
581 Value *Extra = nullptr;
582
583 /// Set of integers to match in switch
585
586 /// Number of comparisons matched in the and/or chain
587 unsigned UsedICmps = 0;
588
589 /// If the elements in Vals matches the comparisons
590 bool IsEq = false;
591
592 // Used to check if the first matched CompValue shall be the Extra check.
593 bool IgnoreFirstMatch = false;
594 bool MultipleMatches = false;
595
596 /// Construct and compute the result for the comparison instruction Cond
597 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
598 gather(Cond);
599 if (CompValue || !MultipleMatches)
600 return;
601 Extra = nullptr;
602 Vals.clear();
603 UsedICmps = 0;
604 IgnoreFirstMatch = true;
605 gather(Cond);
606 }
607
608 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
609 ConstantComparesGatherer &
610 operator=(const ConstantComparesGatherer &) = delete;
611
612private:
613 /// Try to set the current value used for the comparison, it succeeds only if
614 /// it wasn't set before or if the new value is the same as the old one
615 bool setValueOnce(Value *NewVal) {
616 if (IgnoreFirstMatch) {
617 IgnoreFirstMatch = false;
618 return false;
619 }
620 if (CompValue && CompValue != NewVal) {
621 MultipleMatches = true;
622 return false;
623 }
624 CompValue = NewVal;
625 return true;
626 }
627
628 /// Try to match Instruction "I" as a comparison against a constant and
629 /// populates the array Vals with the set of values that match (or do not
630 /// match depending on isEQ).
631 /// Return false on failure. On success, the Value the comparison matched
632 /// against is placed in CompValue.
633 /// If CompValue is already set, the function is expected to fail if a match
634 /// is found but the value compared to is different.
635 bool matchInstruction(Instruction *I, bool isEQ) {
636 if (match(I, m_Not(m_Instruction(I))))
637 isEQ = !isEQ;
638
639 Value *Val;
640 if (match(I, m_NUWTrunc(m_Value(Val)))) {
641 // If we already have a value for the switch, it has to match!
642 if (!setValueOnce(Val))
643 return false;
644 UsedICmps++;
645 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
646 return true;
647 }
648 // If this is an icmp against a constant, handle this as one of the cases.
649 ICmpInst *ICI;
650 ConstantInt *C;
651 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
652 (C = getConstantInt(I->getOperand(1), DL)))) {
653 return false;
654 }
655
656 Value *RHSVal;
657 const APInt *RHSC;
658
659 // Pattern match a special case
660 // (x & ~2^z) == y --> x == y || x == y|2^z
661 // This undoes a transformation done by instcombine to fuse 2 compares.
662 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
663 // It's a little bit hard to see why the following transformations are
664 // correct. Here is a CVC3 program to verify them for 64-bit values:
665
666 /*
667 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
668 x : BITVECTOR(64);
669 y : BITVECTOR(64);
670 z : BITVECTOR(64);
671 mask : BITVECTOR(64) = BVSHL(ONE, z);
672 QUERY( (y & ~mask = y) =>
673 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
674 );
675 QUERY( (y | mask = y) =>
676 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
677 );
678 */
679
680 // Please note that each pattern must be a dual implication (<--> or
681 // iff). One directional implication can create spurious matches. If the
682 // implication is only one-way, an unsatisfiable condition on the left
683 // side can imply a satisfiable condition on the right side. Dual
684 // implication ensures that satisfiable conditions are transformed to
685 // other satisfiable conditions and unsatisfiable conditions are
686 // transformed to other unsatisfiable conditions.
687
688 // Here is a concrete example of a unsatisfiable condition on the left
689 // implying a satisfiable condition on the right:
690 //
691 // mask = (1 << z)
692 // (x & ~mask) == y --> (x == y || x == (y | mask))
693 //
694 // Substituting y = 3, z = 0 yields:
695 // (x & -2) == 3 --> (x == 3 || x == 2)
696
697 // Pattern match a special case:
698 /*
699 QUERY( (y & ~mask = y) =>
700 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
701 );
702 */
703 if (match(ICI->getOperand(0),
704 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
705 APInt Mask = ~*RHSC;
706 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
707 // If we already have a value for the switch, it has to match!
708 if (!setValueOnce(RHSVal))
709 return false;
710
711 Vals.push_back(C);
712 Vals.push_back(
713 ConstantInt::get(C->getContext(),
714 C->getValue() | Mask));
715 UsedICmps++;
716 return true;
717 }
718 }
719
720 // Pattern match a special case:
721 /*
722 QUERY( (y | mask = y) =>
723 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
724 );
725 */
726 if (match(ICI->getOperand(0),
727 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
728 APInt Mask = *RHSC;
729 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
730 // If we already have a value for the switch, it has to match!
731 if (!setValueOnce(RHSVal))
732 return false;
733
734 Vals.push_back(C);
735 Vals.push_back(ConstantInt::get(C->getContext(),
736 C->getValue() & ~Mask));
737 UsedICmps++;
738 return true;
739 }
740 }
741
742 // If we already have a value for the switch, it has to match!
743 if (!setValueOnce(ICI->getOperand(0)))
744 return false;
745
746 UsedICmps++;
747 Vals.push_back(C);
748 return true;
749 }
750
751 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
752 ConstantRange Span =
754
755 // Shift the range if the compare is fed by an add. This is the range
756 // compare idiom as emitted by instcombine.
757 Value *CandidateVal = I->getOperand(0);
758 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
759 Span = Span.subtract(*RHSC);
760 CandidateVal = RHSVal;
761 }
762
763 // If this is an and/!= check, then we are looking to build the set of
764 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
765 // x != 0 && x != 1.
766 if (!isEQ)
767 Span = Span.inverse();
768
769 // If there are a ton of values, we don't want to make a ginormous switch.
770 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
771 return false;
772 }
773
774 // If we already have a value for the switch, it has to match!
775 if (!setValueOnce(CandidateVal))
776 return false;
777
778 // Add all values from the range to the set
779 APInt Tmp = Span.getLower();
780 do
781 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
782 while (++Tmp != Span.getUpper());
783
784 UsedICmps++;
785 return true;
786 }
787
788 /// Given a potentially 'or'd or 'and'd together collection of icmp
789 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
790 /// the value being compared, and stick the list constants into the Vals
791 /// vector.
792 /// One "Extra" case is allowed to differ from the other.
793 void gather(Value *V) {
794 Value *Op0, *Op1;
795 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
796 IsEq = true;
797 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
798 IsEq = false;
799 else
800 return;
801 // Keep a stack (SmallVector for efficiency) for depth-first traversal
802 SmallVector<Value *, 8> DFT{Op0, Op1};
803 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
804
805 while (!DFT.empty()) {
806 V = DFT.pop_back_val();
807
808 if (Instruction *I = dyn_cast<Instruction>(V)) {
809 // If it is a || (or && depending on isEQ), process the operands.
810 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
811 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
812 if (Visited.insert(Op1).second)
813 DFT.push_back(Op1);
814 if (Visited.insert(Op0).second)
815 DFT.push_back(Op0);
816
817 continue;
818 }
819
820 // Try to match the current instruction
821 if (matchInstruction(I, IsEq))
822 // Match succeed, continue the loop
823 continue;
824 }
825
826 // One element of the sequence of || (or &&) could not be match as a
827 // comparison against the same value as the others.
828 // We allow only one "Extra" case to be checked before the switch
829 if (!Extra) {
830 Extra = V;
831 continue;
832 }
833 // Failed to parse a proper sequence, abort now
834 CompValue = nullptr;
835 break;
836 }
837 }
838};
839
840} // end anonymous namespace
841
843 MemorySSAUpdater *MSSAU = nullptr) {
844 Instruction *Cond = nullptr;
846 Cond = dyn_cast<Instruction>(SI->getCondition());
847 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
848 Cond = dyn_cast<Instruction>(BI->getCondition());
849 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
850 Cond = dyn_cast<Instruction>(IBI->getAddress());
851 }
852
853 TI->eraseFromParent();
854 if (Cond)
856}
857
858/// Return true if the specified terminator checks
859/// to see if a value is equal to constant integer value.
860Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
861 Value *CV = nullptr;
862 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
863 // Do not permit merging of large switch instructions into their
864 // predecessors unless there is only one predecessor.
865 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
866 CV = SI->getCondition();
867 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI))
868 if (BI->getCondition()->hasOneUse()) {
869 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
870 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
871 CV = ICI->getOperand(0);
872 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
873 if (Trunc->hasNoUnsignedWrap())
874 CV = Trunc->getOperand(0);
875 }
876 }
877
878 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
879 if (CV) {
880 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
881 Value *Ptr = PTII->getPointerOperand();
882 if (DL.hasUnstableRepresentation(Ptr->getType()))
883 return CV;
884 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
885 CV = Ptr;
886 }
887 }
888 return CV;
889}
890
891/// Given a value comparison instruction,
892/// decode all of the 'cases' that it represents and return the 'default' block.
893BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
894 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
895 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
896 Cases.reserve(SI->getNumCases());
897 for (auto Case : SI->cases())
898 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
899 Case.getCaseSuccessor()));
900 return SI->getDefaultDest();
901 }
902
903 CondBrInst *BI = cast<CondBrInst>(TI);
904 Value *Cond = BI->getCondition();
905 ICmpInst::Predicate Pred;
906 ConstantInt *C;
907 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
908 Pred = ICI->getPredicate();
909 C = getConstantInt(ICI->getOperand(1), DL);
910 } else {
911 Pred = ICmpInst::ICMP_NE;
912 auto *Trunc = cast<TruncInst>(Cond);
913 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
914 }
915 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
916 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
917 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
918}
919
920/// Given a vector of bb/value pairs, remove any entries
921/// in the list that match the specified block.
922static void
924 std::vector<ValueEqualityComparisonCase> &Cases) {
925 llvm::erase(Cases, BB);
926}
927
928/// Return true if there are any keys in C1 that exist in C2 as well.
929static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
930 std::vector<ValueEqualityComparisonCase> &C2) {
931 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
932
933 // Make V1 be smaller than V2.
934 if (V1->size() > V2->size())
935 std::swap(V1, V2);
936
937 if (V1->empty())
938 return false;
939 if (V1->size() == 1) {
940 // Just scan V2.
941 ConstantInt *TheVal = (*V1)[0].Value;
942 for (const ValueEqualityComparisonCase &VECC : *V2)
943 if (TheVal == VECC.Value)
944 return true;
945 }
946
947 // Otherwise, just sort both lists and compare element by element.
948 array_pod_sort(V1->begin(), V1->end());
949 array_pod_sort(V2->begin(), V2->end());
950 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
951 while (i1 != e1 && i2 != e2) {
952 if ((*V1)[i1].Value == (*V2)[i2].Value)
953 return true;
954 if ((*V1)[i1].Value < (*V2)[i2].Value)
955 ++i1;
956 else
957 ++i2;
958 }
959 return false;
960}
961
962/// If TI is known to be a terminator instruction and its block is known to
963/// only have a single predecessor block, check to see if that predecessor is
964/// also a value comparison with the same value, and if that comparison
965/// determines the outcome of this comparison. If so, simplify TI. This does a
966/// very limited form of jump threading.
967bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
968 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
969 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
970 if (!PredVal)
971 return false; // Not a value comparison in predecessor.
972
973 Value *ThisVal = isValueEqualityComparison(TI);
974 assert(ThisVal && "This isn't a value comparison!!");
975 if (ThisVal != PredVal)
976 return false; // Different predicates.
977
978 // TODO: Preserve branch weight metadata, similarly to how
979 // foldValueComparisonIntoPredecessors preserves it.
980
981 // Find out information about when control will move from Pred to TI's block.
982 std::vector<ValueEqualityComparisonCase> PredCases;
983 BasicBlock *PredDef =
984 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
985 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
986
987 // Find information about how control leaves this block.
988 std::vector<ValueEqualityComparisonCase> ThisCases;
989 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
990 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
991
992 // If TI's block is the default block from Pred's comparison, potentially
993 // simplify TI based on this knowledge.
994 if (PredDef == TI->getParent()) {
995 // If we are here, we know that the value is none of those cases listed in
996 // PredCases. If there are any cases in ThisCases that are in PredCases, we
997 // can simplify TI.
998 if (!valuesOverlap(PredCases, ThisCases))
999 return false;
1000
1001 if (isa<CondBrInst>(TI)) {
1002 // Okay, one of the successors of this condbr is dead. Convert it to a
1003 // uncond br.
1004 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1005 // Insert the new branch.
1006 Instruction *NI = Builder.CreateBr(ThisDef);
1007 (void)NI;
1008
1009 // Remove PHI node entries for the dead edge.
1010 ThisCases[0].Dest->removePredecessor(PredDef);
1011
1012 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1013 << "Through successor TI: " << *TI << "Leaving: " << *NI
1014 << "\n");
1015
1017
1018 if (DTU)
1019 DTU->applyUpdates(
1020 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1021
1022 return true;
1023 }
1024
1025 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1026 // Okay, TI has cases that are statically dead, prune them away.
1027 SmallPtrSet<Constant *, 16> DeadCases;
1028 for (const ValueEqualityComparisonCase &Case : PredCases)
1029 DeadCases.insert(Case.Value);
1030
1031 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1032 << "Through successor TI: " << *TI);
1033
1034 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1035 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1036 --i;
1037 auto *Successor = i->getCaseSuccessor();
1038 if (DTU)
1039 ++NumPerSuccessorCases[Successor];
1040 if (DeadCases.count(i->getCaseValue())) {
1041 Successor->removePredecessor(PredDef);
1042 SI.removeCase(i);
1043 if (DTU)
1044 --NumPerSuccessorCases[Successor];
1045 }
1046 }
1047
1048 if (DTU) {
1049 std::vector<DominatorTree::UpdateType> Updates;
1050 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1051 if (I.second == 0)
1052 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1053 DTU->applyUpdates(Updates);
1054 }
1055
1056 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1057 return true;
1058 }
1059
1060 // Otherwise, TI's block must correspond to some matched value. Find out
1061 // which value (or set of values) this is.
1062 ConstantInt *TIV = nullptr;
1063 BasicBlock *TIBB = TI->getParent();
1064 for (const auto &[Value, Dest] : PredCases)
1065 if (Dest == TIBB) {
1066 if (TIV)
1067 return false; // Cannot handle multiple values coming to this block.
1068 TIV = Value;
1069 }
1070 assert(TIV && "No edge from pred to succ?");
1071
1072 // Okay, we found the one constant that our value can be if we get into TI's
1073 // BB. Find out which successor will unconditionally be branched to.
1074 BasicBlock *TheRealDest = nullptr;
1075 for (const auto &[Value, Dest] : ThisCases)
1076 if (Value == TIV) {
1077 TheRealDest = Dest;
1078 break;
1079 }
1080
1081 // If not handled by any explicit cases, it is handled by the default case.
1082 if (!TheRealDest)
1083 TheRealDest = ThisDef;
1084
1085 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1086
1087 // Remove PHI node entries for dead edges.
1088 BasicBlock *CheckEdge = TheRealDest;
1089 for (BasicBlock *Succ : successors(TIBB))
1090 if (Succ != CheckEdge) {
1091 if (Succ != TheRealDest)
1092 RemovedSuccs.insert(Succ);
1093 Succ->removePredecessor(TIBB);
1094 } else
1095 CheckEdge = nullptr;
1096
1097 // Insert the new branch.
1098 Instruction *NI = Builder.CreateBr(TheRealDest);
1099 (void)NI;
1100
1101 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1102 << "Through successor TI: " << *TI << "Leaving: " << *NI
1103 << "\n");
1104
1106 if (DTU) {
1107 SmallVector<DominatorTree::UpdateType, 2> Updates;
1108 Updates.reserve(RemovedSuccs.size());
1109 for (auto *RemovedSucc : RemovedSuccs)
1110 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1111 DTU->applyUpdates(Updates);
1112 }
1113 return true;
1114}
1115
1116namespace {
1117
1118/// This class implements a stable ordering of constant
1119/// integers that does not depend on their address. This is important for
1120/// applications that sort ConstantInt's to ensure uniqueness.
1121struct ConstantIntOrdering {
1122 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1123 return LHS->getValue().ult(RHS->getValue());
1124 }
1125};
1126
1127} // end anonymous namespace
1128
1130 ConstantInt *const *P2) {
1131 const ConstantInt *LHS = *P1;
1132 const ConstantInt *RHS = *P2;
1133 if (LHS == RHS)
1134 return 0;
1135 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1136}
1137
1138/// Get Weights of a given terminator, the default weight is at the front
1139/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1140/// metadata.
1142 SmallVectorImpl<uint64_t> &Weights) {
1143 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1144 assert(MD && "Invalid branch-weight metadata");
1145 extractFromBranchWeightMD64(MD, Weights);
1146
1147 // If TI is a conditional eq, the default case is the false case,
1148 // and the corresponding branch-weight data is at index 2. We swap the
1149 // default weight to be the first entry.
1150 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
1151 assert(Weights.size() == 2);
1152 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1153 if (!ICI)
1154 return;
1155
1156 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1157 std::swap(Weights.front(), Weights.back());
1158 }
1159}
1160
1162 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1163 Instruction *PTI = PredBlock->getTerminator();
1164
1165 // If we have bonus instructions, clone them into the predecessor block.
1166 // Note that there may be multiple predecessor blocks, so we cannot move
1167 // bonus instructions to a predecessor block.
1168 for (Instruction &BonusInst : *BB) {
1169 if (BonusInst.isTerminator())
1170 continue;
1171
1172 Instruction *NewBonusInst = BonusInst.clone();
1173
1174 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1175 // Unless the instruction has the same !dbg location as the original
1176 // branch, drop it. When we fold the bonus instructions we want to make
1177 // sure we reset their debug locations in order to avoid stepping on
1178 // dead code caused by folding dead branches.
1179 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1180 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1181 mapAtomInstance(DL, VMap);
1182 }
1183
1184 RemapInstruction(NewBonusInst, VMap,
1186
1187 // If we speculated an instruction, we need to drop any metadata that may
1188 // result in undefined behavior, as the metadata might have been valid
1189 // only given the branch precondition.
1190 // Similarly strip attributes on call parameters that may cause UB in
1191 // location the call is moved to.
1192 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1193
1194 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1195 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1196 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1198
1199 NewBonusInst->takeName(&BonusInst);
1200 BonusInst.setName(NewBonusInst->getName() + ".old");
1201 VMap[&BonusInst] = NewBonusInst;
1202
1203 // Update (liveout) uses of bonus instructions,
1204 // now that the bonus instruction has been cloned into predecessor.
1205 // Note that we expect to be in a block-closed SSA form for this to work!
1206 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1207 auto *UI = cast<Instruction>(U.getUser());
1208 auto *PN = dyn_cast<PHINode>(UI);
1209 if (!PN) {
1210 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1211 "If the user is not a PHI node, then it should be in the same "
1212 "block as, and come after, the original bonus instruction.");
1213 continue; // Keep using the original bonus instruction.
1214 }
1215 // Is this the block-closed SSA form PHI node?
1216 if (PN->getIncomingBlock(U) == BB)
1217 continue; // Great, keep using the original bonus instruction.
1218 // The only other alternative is an "use" when coming from
1219 // the predecessor block - here we should refer to the cloned bonus instr.
1220 assert(PN->getIncomingBlock(U) == PredBlock &&
1221 "Not in block-closed SSA form?");
1222 U.set(NewBonusInst);
1223 }
1224 }
1225
1226 // Key Instructions: We may have propagated atom info into the pred. If the
1227 // pred's terminator already has atom info do nothing as merging would drop
1228 // one atom group anyway. If it doesn't, propagte the remapped atom group
1229 // from BB's terminator.
1230 if (auto &PredDL = PTI->getDebugLoc()) {
1231 auto &DL = BB->getTerminator()->getDebugLoc();
1232 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1233 PredDL.isSameSourceLocation(DL)) {
1234 PTI->setDebugLoc(DL);
1235 RemapSourceAtom(PTI, VMap);
1236 }
1237 }
1238}
1239
1240bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1241 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1242 BasicBlock *BB = TI->getParent();
1243 BasicBlock *Pred = PTI->getParent();
1244
1246
1247 // Figure out which 'cases' to copy from SI to PSI.
1248 std::vector<ValueEqualityComparisonCase> BBCases;
1249 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1250
1251 std::vector<ValueEqualityComparisonCase> PredCases;
1252 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1253
1254 // Based on whether the default edge from PTI goes to BB or not, fill in
1255 // PredCases and PredDefault with the new switch cases we would like to
1256 // build.
1257 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1258
1259 // Update the branch weight metadata along the way
1260 SmallVector<uint64_t, 8> Weights;
1261 bool PredHasWeights = hasBranchWeightMD(*PTI);
1262 bool SuccHasWeights = hasBranchWeightMD(*TI);
1263
1264 if (PredHasWeights) {
1265 getBranchWeights(PTI, Weights);
1266 // branch-weight metadata is inconsistent here.
1267 if (Weights.size() != 1 + PredCases.size())
1268 PredHasWeights = SuccHasWeights = false;
1269 } else if (SuccHasWeights)
1270 // If there are no predecessor weights but there are successor weights,
1271 // populate Weights with 1, which will later be scaled to the sum of
1272 // successor's weights
1273 Weights.assign(1 + PredCases.size(), 1);
1274
1275 SmallVector<uint64_t, 8> SuccWeights;
1276 if (SuccHasWeights) {
1277 getBranchWeights(TI, SuccWeights);
1278 // branch-weight metadata is inconsistent here.
1279 if (SuccWeights.size() != 1 + BBCases.size())
1280 PredHasWeights = SuccHasWeights = false;
1281 } else if (PredHasWeights)
1282 SuccWeights.assign(1 + BBCases.size(), 1);
1283
1284 if (PredDefault == BB) {
1285 // If this is the default destination from PTI, only the edges in TI
1286 // that don't occur in PTI, or that branch to BB will be activated.
1287 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1288 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1289 if (PredCases[i].Dest != BB)
1290 PTIHandled.insert(PredCases[i].Value);
1291 else {
1292 // The default destination is BB, we don't need explicit targets.
1293 std::swap(PredCases[i], PredCases.back());
1294
1295 if (PredHasWeights || SuccHasWeights) {
1296 // Increase weight for the default case.
1297 Weights[0] += Weights[i + 1];
1298 std::swap(Weights[i + 1], Weights.back());
1299 Weights.pop_back();
1300 }
1301
1302 PredCases.pop_back();
1303 --i;
1304 --e;
1305 }
1306
1307 // Reconstruct the new switch statement we will be building.
1308 if (PredDefault != BBDefault) {
1309 PredDefault->removePredecessor(Pred);
1310 if (DTU && PredDefault != BB)
1311 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1312 PredDefault = BBDefault;
1313 ++NewSuccessors[BBDefault];
1314 }
1315
1316 unsigned CasesFromPred = Weights.size();
1317 uint64_t ValidTotalSuccWeight = 0;
1318 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1319 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1320 PredCases.push_back(BBCases[i]);
1321 ++NewSuccessors[BBCases[i].Dest];
1322 if (SuccHasWeights || PredHasWeights) {
1323 // The default weight is at index 0, so weight for the ith case
1324 // should be at index i+1. Scale the cases from successor by
1325 // PredDefaultWeight (Weights[0]).
1326 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1327 ValidTotalSuccWeight += SuccWeights[i + 1];
1328 }
1329 }
1330
1331 if (SuccHasWeights || PredHasWeights) {
1332 ValidTotalSuccWeight += SuccWeights[0];
1333 // Scale the cases from predecessor by ValidTotalSuccWeight.
1334 for (unsigned i = 1; i < CasesFromPred; ++i)
1335 Weights[i] *= ValidTotalSuccWeight;
1336 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1337 Weights[0] *= SuccWeights[0];
1338 }
1339 } else {
1340 // If this is not the default destination from PSI, only the edges
1341 // in SI that occur in PSI with a destination of BB will be
1342 // activated.
1343 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1344 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1345 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1346 if (PredCases[i].Dest == BB) {
1347 PTIHandled.insert(PredCases[i].Value);
1348
1349 if (PredHasWeights || SuccHasWeights) {
1350 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1351 std::swap(Weights[i + 1], Weights.back());
1352 Weights.pop_back();
1353 }
1354
1355 std::swap(PredCases[i], PredCases.back());
1356 PredCases.pop_back();
1357 --i;
1358 --e;
1359 }
1360
1361 // Okay, now we know which constants were sent to BB from the
1362 // predecessor. Figure out where they will all go now.
1363 for (const ValueEqualityComparisonCase &Case : BBCases)
1364 if (PTIHandled.count(Case.Value)) {
1365 // If this is one we are capable of getting...
1366 if (PredHasWeights || SuccHasWeights)
1367 Weights.push_back(WeightsForHandled[Case.Value]);
1368 PredCases.push_back(Case);
1369 ++NewSuccessors[Case.Dest];
1370 PTIHandled.erase(Case.Value); // This constant is taken care of
1371 }
1372
1373 // If there are any constants vectored to BB that TI doesn't handle,
1374 // they must go to the default destination of TI.
1375 for (ConstantInt *I : PTIHandled) {
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[I]);
1378 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1379 ++NewSuccessors[BBDefault];
1380 }
1381 }
1382
1383 // Okay, at this point, we know which new successor Pred will get. Make
1384 // sure we update the number of entries in the PHI nodes for these
1385 // successors.
1386 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1387 if (DTU) {
1388 SuccsOfPred = {llvm::from_range, successors(Pred)};
1389 Updates.reserve(Updates.size() + NewSuccessors.size());
1390 }
1391 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1392 NewSuccessors) {
1393 for (auto I : seq(NewSuccessor.second)) {
1394 (void)I;
1395 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1396 }
1397 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1398 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1399 }
1400
1401 Builder.SetInsertPoint(PTI);
1402 // Convert pointer to int before we switch.
1403 if (CV->getType()->isPointerTy()) {
1404 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1405 "Should not end up here with unstable pointers");
1406 CV =
1407 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1408 }
1409
1410 // Now that the successors are updated, create the new Switch instruction.
1411 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1412 NewSI->setDebugLoc(PTI->getDebugLoc());
1413 for (ValueEqualityComparisonCase &V : PredCases)
1414 NewSI->addCase(V.Value, V.Dest);
1415
1416 if (PredHasWeights || SuccHasWeights)
1417 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1418 /*ElideAllZero=*/true);
1419
1421
1422 // Okay, last check. If BB is still a successor of PSI, then we must
1423 // have an infinite loop case. If so, add an infinitely looping block
1424 // to handle the case to preserve the behavior of the code.
1425 BasicBlock *InfLoopBlock = nullptr;
1426 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1427 if (NewSI->getSuccessor(i) == BB) {
1428 if (!InfLoopBlock) {
1429 // Insert it at the end of the function, because it's either code,
1430 // or it won't matter if it's hot. :)
1431 InfLoopBlock =
1432 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1433 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
1434 if (DTU)
1435 Updates.push_back(
1436 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1437 }
1438 NewSI->setSuccessor(i, InfLoopBlock);
1439 }
1440
1441 if (DTU) {
1442 if (InfLoopBlock)
1443 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1444
1445 Updates.push_back({DominatorTree::Delete, Pred, BB});
1446
1447 DTU->applyUpdates(Updates);
1448 }
1449
1450 ++NumFoldValueComparisonIntoPredecessors;
1451 return true;
1452}
1453
1454/// The specified terminator is a value equality comparison instruction
1455/// (either a switch or a branch on "X == c").
1456/// See if any of the predecessors of the terminator block are value comparisons
1457/// on the same value. If so, and if safe to do so, fold them together.
1458bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1459 IRBuilder<> &Builder) {
1460 BasicBlock *BB = TI->getParent();
1461 Value *CV = isValueEqualityComparison(TI); // CondVal
1462 assert(CV && "Not a comparison?");
1463
1464 bool Changed = false;
1465
1466 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1467 while (!Preds.empty()) {
1468 BasicBlock *Pred = Preds.pop_back_val();
1469 Instruction *PTI = Pred->getTerminator();
1470
1471 // Don't try to fold into itself.
1472 if (Pred == BB)
1473 continue;
1474
1475 // See if the predecessor is a comparison with the same value.
1476 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1477 if (PCV != CV)
1478 continue;
1479
1480 SmallSetVector<BasicBlock *, 4> FailBlocks;
1481 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1482 for (auto *Succ : FailBlocks) {
1483 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1484 return false;
1485 }
1486 }
1487
1488 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1489 Changed = true;
1490 }
1491 return Changed;
1492}
1493
1494// If we would need to insert a select that uses the value of this invoke
1495// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1496// need to do this), we can't hoist the invoke, as there is nowhere to put the
1497// select in this case.
1499 Instruction *I1, Instruction *I2) {
1500 for (BasicBlock *Succ : successors(BB1)) {
1501 for (const PHINode &PN : Succ->phis()) {
1502 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1503 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1504 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1505 return false;
1506 }
1507 }
1508 }
1509 return true;
1510}
1511
1512// Get interesting characteristics of instructions that
1513// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1514// instructions can be reordered across.
1520
1522 unsigned Flags = 0;
1523 if (I->mayReadFromMemory())
1524 Flags |= SkipReadMem;
1525 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1526 // inalloca) across stacksave/stackrestore boundaries.
1527 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1528 Flags |= SkipSideEffect;
1530 Flags |= SkipImplicitControlFlow;
1531 return Flags;
1532}
1533
1534// Returns true if it is safe to reorder an instruction across preceding
1535// instructions in a basic block.
1536static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1537 // Don't reorder a store over a load.
1538 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1539 return false;
1540
1541 // If we have seen an instruction with side effects, it's unsafe to reorder an
1542 // instruction which reads memory or itself has side effects.
1543 if ((Flags & SkipSideEffect) &&
1544 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1545 return false;
1546
1547 // Reordering across an instruction which does not necessarily transfer
1548 // control to the next instruction is speculation.
1550 return false;
1551
1552 // Hoisting of llvm.deoptimize is only legal together with the next return
1553 // instruction, which this pass is not always able to do.
1554 if (auto *CB = dyn_cast<CallBase>(I))
1555 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1556 return false;
1557
1558 // It's also unsafe/illegal to hoist an instruction above its instruction
1559 // operands
1560 BasicBlock *BB = I->getParent();
1561 for (Value *Op : I->operands()) {
1562 if (auto *J = dyn_cast<Instruction>(Op))
1563 if (J->getParent() == BB)
1564 return false;
1565 }
1566
1567 return true;
1568}
1569
1570static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1571
1572/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1573/// instructions \p I1 and \p I2 can and should be hoisted.
1575 const TargetTransformInfo &TTI) {
1576 // If we're going to hoist a call, make sure that the two instructions
1577 // we're commoning/hoisting are both marked with musttail, or neither of
1578 // them is marked as such. Otherwise, we might end up in a situation where
1579 // we hoist from a block where the terminator is a `ret` to a block where
1580 // the terminator is a `br`, and `musttail` calls expect to be followed by
1581 // a return.
1582 auto *C1 = dyn_cast<CallInst>(I1);
1583 auto *C2 = dyn_cast<CallInst>(I2);
1584 if (C1 && C2)
1585 if (C1->isMustTailCall() != C2->isMustTailCall())
1586 return false;
1587
1588 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1589 return false;
1590
1591 // If any of the two call sites has nomerge or convergent attribute, stop
1592 // hoisting.
1593 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1594 if (CB1->cannotMerge() || CB1->isConvergent())
1595 return false;
1596 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1597 if (CB2->cannotMerge() || CB2->isConvergent())
1598 return false;
1599
1600 return true;
1601}
1602
1603/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1604/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1605/// hoistCommonCodeFromSuccessors. e.g. The input:
1606/// I1 DVRs: { x, z },
1607/// OtherInsts: { I2 DVRs: { x, y, z } }
1608/// would result in hoisting only DbgVariableRecord x.
1610 Instruction *TI, Instruction *I1,
1611 SmallVectorImpl<Instruction *> &OtherInsts) {
1612 if (!I1->hasDbgRecords())
1613 return;
1614 using CurrentAndEndIt =
1615 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1616 // Vector of {Current, End} iterators.
1618 Itrs.reserve(OtherInsts.size() + 1);
1619 // Helper lambdas for lock-step checks:
1620 // Return true if this Current == End.
1621 auto atEnd = [](const CurrentAndEndIt &Pair) {
1622 return Pair.first == Pair.second;
1623 };
1624 // Return true if all Current are identical.
1625 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1626 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1628 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1629 });
1630 };
1631
1632 // Collect the iterators.
1633 Itrs.push_back(
1634 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1635 for (Instruction *Other : OtherInsts) {
1636 if (!Other->hasDbgRecords())
1637 return;
1638 Itrs.push_back(
1639 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1640 }
1641
1642 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1643 // the lock-step DbgRecord are identical, hoist all of them to TI.
1644 // This replicates the dbg.* intrinsic behaviour in
1645 // hoistCommonCodeFromSuccessors.
1646 while (none_of(Itrs, atEnd)) {
1647 bool HoistDVRs = allIdentical(Itrs);
1648 for (CurrentAndEndIt &Pair : Itrs) {
1649 // Increment Current iterator now as we may be about to move the
1650 // DbgRecord.
1651 DbgRecord &DR = *Pair.first++;
1652 if (HoistDVRs) {
1653 DR.removeFromParent();
1654 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1655 }
1656 }
1657 }
1658}
1659
1661 const Instruction *I2) {
1662 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1663 return true;
1664
1665 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1666 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1667 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1668 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1669 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1670
1671 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1672 return I1->getOperand(0) == I2->getOperand(1) &&
1673 I1->getOperand(1) == I2->getOperand(0) &&
1674 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1675 }
1676
1677 return false;
1678}
1679
1680/// If the target supports conditional faulting,
1681/// we look for the following pattern:
1682/// \code
1683/// BB:
1684/// ...
1685/// %cond = icmp ult %x, %y
1686/// br i1 %cond, label %TrueBB, label %FalseBB
1687/// FalseBB:
1688/// store i32 1, ptr %q, align 4
1689/// ...
1690/// TrueBB:
1691/// %maskedloadstore = load i32, ptr %b, align 4
1692/// store i32 %maskedloadstore, ptr %p, align 4
1693/// ...
1694/// \endcode
1695///
1696/// and transform it into:
1697///
1698/// \code
1699/// BB:
1700/// ...
1701/// %cond = icmp ult %x, %y
1702/// %maskedloadstore = cload i32, ptr %b, %cond
1703/// cstore i32 %maskedloadstore, ptr %p, %cond
1704/// cstore i32 1, ptr %q, ~%cond
1705/// br i1 %cond, label %TrueBB, label %FalseBB
1706/// FalseBB:
1707/// ...
1708/// TrueBB:
1709/// ...
1710/// \endcode
1711///
1712/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1713/// e.g.
1714///
1715/// \code
1716/// %vcond = bitcast i1 %cond to <1 x i1>
1717/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1718/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1719/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1720/// call void @llvm.masked.store.v1i32.p0
1721/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1722/// %cond.not = xor i1 %cond, true
1723/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1724/// call void @llvm.masked.store.v1i32.p0
1725/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1726/// \endcode
1727///
1728/// So we need to turn hoisted load/store into cload/cstore.
1729///
1730/// \param BI The branch instruction.
1731/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1732/// will be speculated.
1733/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1735 CondBrInst *BI,
1736 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1737 std::optional<bool> Invert, Instruction *Sel) {
1738 auto &Context = BI->getParent()->getContext();
1739 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1740 auto *Cond = BI->getCondition();
1741 // Construct the condition if needed.
1742 BasicBlock *BB = BI->getParent();
1743 Value *Mask = nullptr;
1744 Value *MaskFalse = nullptr;
1745 Value *MaskTrue = nullptr;
1746 if (Invert.has_value()) {
1747 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1748 Mask = Builder.CreateBitCast(
1749 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1750 VCondTy);
1751 } else {
1752 IRBuilder<> Builder(BI);
1753 MaskFalse = Builder.CreateBitCast(
1754 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1755 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1756 }
1757 auto PeekThroughBitcasts = [](Value *V) {
1758 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1759 V = BitCast->getOperand(0);
1760 return V;
1761 };
1762 for (auto *I : SpeculatedConditionalLoadsStores) {
1763 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1764 if (!Invert.has_value())
1765 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1766 // We currently assume conditional faulting load/store is supported for
1767 // scalar types only when creating new instructions. This can be easily
1768 // extended for vector types in the future.
1769 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1770 auto *Op0 = I->getOperand(0);
1771 CallInst *MaskedLoadStore = nullptr;
1772 if (auto *LI = dyn_cast<LoadInst>(I)) {
1773 // Handle Load.
1774 auto *Ty = I->getType();
1775 PHINode *PN = nullptr;
1776 Value *PassThru = nullptr;
1777 if (Invert.has_value())
1778 for (User *U : I->users()) {
1779 if ((PN = dyn_cast<PHINode>(U))) {
1780 PassThru = Builder.CreateBitCast(
1781 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1782 FixedVectorType::get(Ty, 1));
1783 } else if (auto *Ins = cast<Instruction>(U);
1784 Sel && Ins->getParent() == BB) {
1785 // This happens when store or/and a speculative instruction between
1786 // load and store were hoisted to the BB. Make sure the masked load
1787 // inserted before its use.
1788 // We assume there's one of such use.
1789 Builder.SetInsertPoint(Ins);
1790 }
1791 }
1792 MaskedLoadStore = Builder.CreateMaskedLoad(
1793 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1794 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1795 if (PN)
1796 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1797 I->replaceAllUsesWith(NewLoadStore);
1798 } else {
1799 // Handle Store.
1800 auto *StoredVal = Builder.CreateBitCast(
1801 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1802 MaskedLoadStore = Builder.CreateMaskedStore(
1803 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1804 }
1805 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1806 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1807 //
1808 // !nonnull, !align : Not support pointer type, no need to keep.
1809 // !range: Load type is changed from scalar to vector, but the metadata on
1810 // vector specifies a per-element range, so the semantics stay the
1811 // same. Keep it.
1812 // !annotation: Not impact semantics. Keep it.
1813 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1814 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1815 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1816 // FIXME: DIAssignID is not supported for masked store yet.
1817 // (Verifier::visitDIAssignIDMetadata)
1819 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1820 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1821 });
1822 MaskedLoadStore->copyMetadata(*I);
1823 I->eraseFromParent();
1824 }
1825}
1826
1828 const TargetTransformInfo &TTI) {
1829 // Not handle volatile or atomic.
1830 bool IsStore = false;
1831 if (auto *L = dyn_cast<LoadInst>(I)) {
1832 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1833 return false;
1834 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1835 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1836 return false;
1837 IsStore = true;
1838 } else
1839 return false;
1840
1841 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1842 // That's why we have the alignment limitation.
1843 // FIXME: Update the prototype of the intrinsics?
1844 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1846}
1847
1848/// Hoist any common code in the successor blocks up into the block. This
1849/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1850/// given, only perform hoisting in case all successors blocks contain matching
1851/// instructions only. In that case, all instructions can be hoisted and the
1852/// original branch will be replaced and selects for PHIs are added.
1853bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1854 bool AllInstsEqOnly) {
1855 // This does very trivial matching, with limited scanning, to find identical
1856 // instructions in the two blocks. In particular, we don't want to get into
1857 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1858 // such, we currently just scan for obviously identical instructions in an
1859 // identical order, possibly separated by the same number of non-identical
1860 // instructions.
1861 BasicBlock *BB = TI->getParent();
1862 unsigned int SuccSize = succ_size(BB);
1863 if (SuccSize < 2)
1864 return false;
1865
1866 // If either of the blocks has it's address taken, then we can't do this fold,
1867 // because the code we'd hoist would no longer run when we jump into the block
1868 // by it's address.
1869 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1870 for (auto *Succ : UniqueSuccessors) {
1871 if (Succ->hasAddressTaken())
1872 return false;
1873 // Use getUniquePredecessor instead of getSinglePredecessor to support
1874 // multi-cases successors in switch.
1875 if (Succ->getUniquePredecessor())
1876 continue;
1877 // If Succ has >1 predecessors, continue to check if the Succ contains only
1878 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1879 // can relax the condition based on the assumptiom that the program would
1880 // never enter Succ and trigger such an UB.
1881 if (isa<UnreachableInst>(*Succ->begin()))
1882 continue;
1883 return false;
1884 }
1885 // The second of pair is a SkipFlags bitmask.
1886 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1887 SmallVector<SuccIterPair, 8> SuccIterPairs;
1888 for (auto *Succ : UniqueSuccessors) {
1889 BasicBlock::iterator SuccItr = Succ->begin();
1890 if (isa<PHINode>(*SuccItr))
1891 return false;
1892 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1893 }
1894
1895 if (AllInstsEqOnly) {
1896 // Check if all instructions in the successor blocks match. This allows
1897 // hoisting all instructions and removing the blocks we are hoisting from,
1898 // so does not add any new instructions.
1899
1900 // Check if sizes and terminators of all successors match.
1901 unsigned Size0 = UniqueSuccessors[0]->size();
1902 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1903 bool AllSame =
1904 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1905 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1906 Succ->size() == Size0;
1907 });
1908 if (!AllSame)
1909 return false;
1910 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1911 while (LRI.isValid()) {
1912 Instruction *I0 = (*LRI)[0];
1913 if (any_of(*LRI, [I0](Instruction *I) {
1914 return !areIdenticalUpToCommutativity(I0, I);
1915 })) {
1916 return false;
1917 }
1918 --LRI;
1919 }
1920 // Now we know that all instructions in all successors can be hoisted. Let
1921 // the loop below handle the hoisting.
1922 }
1923
1924 // Count how many instructions were not hoisted so far. There's a limit on how
1925 // many instructions we skip, serving as a compilation time control as well as
1926 // preventing excessive increase of life ranges.
1927 unsigned NumSkipped = 0;
1928 // If we find an unreachable instruction at the beginning of a basic block, we
1929 // can still hoist instructions from the rest of the basic blocks.
1930 if (SuccIterPairs.size() > 2) {
1931 erase_if(SuccIterPairs,
1932 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1933 if (SuccIterPairs.size() < 2)
1934 return false;
1935 }
1936
1937 bool Changed = false;
1938
1939 for (;;) {
1940 auto *SuccIterPairBegin = SuccIterPairs.begin();
1941 auto &BB1ItrPair = *SuccIterPairBegin++;
1942 auto OtherSuccIterPairRange =
1943 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1944 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1945
1946 Instruction *I1 = &*BB1ItrPair.first;
1947
1948 bool AllInstsAreIdentical = true;
1949 bool HasTerminator = I1->isTerminator();
1950 for (auto &SuccIter : OtherSuccIterRange) {
1951 Instruction *I2 = &*SuccIter;
1952 HasTerminator |= I2->isTerminator();
1953 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1954 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1955 AllInstsAreIdentical = false;
1956 }
1957
1958 SmallVector<Instruction *, 8> OtherInsts;
1959 for (auto &SuccIter : OtherSuccIterRange)
1960 OtherInsts.push_back(&*SuccIter);
1961
1962 // If we are hoisting the terminator instruction, don't move one (making a
1963 // broken BB), instead clone it, and remove BI.
1964 if (HasTerminator) {
1965 // Even if BB, which contains only one unreachable instruction, is ignored
1966 // at the beginning of the loop, we can hoist the terminator instruction.
1967 // If any instructions remain in the block, we cannot hoist terminators.
1968 if (NumSkipped || !AllInstsAreIdentical) {
1969 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1970 return Changed;
1971 }
1972
1973 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1974 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1975 Changed;
1976 }
1977
1978 if (AllInstsAreIdentical) {
1979 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1980 AllInstsAreIdentical =
1981 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1982 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1983 Instruction *I2 = &*Pair.first;
1984 unsigned SkipFlagsBB2 = Pair.second;
1985 // Even if the instructions are identical, it may not
1986 // be safe to hoist them if we have skipped over
1987 // instructions with side effects or their operands
1988 // weren't hoisted.
1989 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1991 });
1992 }
1993
1994 if (AllInstsAreIdentical) {
1995 BB1ItrPair.first++;
1996 // For a normal instruction, we just move one to right before the
1997 // branch, then replace all uses of the other with the first. Finally,
1998 // we remove the now redundant second instruction.
1999 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2000 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2001 // and leave any that were not hoisted behind (by calling moveBefore
2002 // rather than moveBeforePreserving).
2003 I1->moveBefore(TI->getIterator());
2004 for (auto &SuccIter : OtherSuccIterRange) {
2005 Instruction *I2 = &*SuccIter++;
2006 assert(I2 != I1);
2007 if (!I2->use_empty())
2008 I2->replaceAllUsesWith(I1);
2009 I1->andIRFlags(I2);
2010 if (auto *CB = dyn_cast<CallBase>(I1)) {
2011 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2012 assert(Success && "We should not be trying to hoist callbases "
2013 "with non-intersectable attributes");
2014 // For NDEBUG Compile.
2015 (void)Success;
2016 }
2017
2018 combineMetadataForCSE(I1, I2, true);
2019 // I1 and I2 are being combined into a single instruction. Its debug
2020 // location is the merged locations of the original instructions.
2021 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2022 I2->eraseFromParent();
2023 }
2024 if (!Changed)
2025 NumHoistCommonCode += SuccIterPairs.size();
2026 Changed = true;
2027 NumHoistCommonInstrs += SuccIterPairs.size();
2028 } else {
2029 if (NumSkipped >= HoistCommonSkipLimit) {
2030 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2031 return Changed;
2032 }
2033 // We are about to skip over a pair of non-identical instructions. Record
2034 // if any have characteristics that would prevent reordering instructions
2035 // across them.
2036 for (auto &SuccIterPair : SuccIterPairs) {
2037 Instruction *I = &*SuccIterPair.first++;
2038 SuccIterPair.second |= skippedInstrFlags(I);
2039 }
2040 ++NumSkipped;
2041 }
2042 }
2043}
2044
2045bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2046 Instruction *TI, Instruction *I1,
2047 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2048 ArrayRef<BasicBlock *> UniqueSuccessors) {
2049
2050 auto *BI = dyn_cast<CondBrInst>(TI);
2051
2052 bool Changed = false;
2053 BasicBlock *TIParent = TI->getParent();
2054 BasicBlock *BB1 = I1->getParent();
2055
2056 // Use only for an if statement.
2057 auto *I2 = *OtherSuccTIs.begin();
2058 auto *BB2 = I2->getParent();
2059 if (BI) {
2060 assert(OtherSuccTIs.size() == 1);
2061 assert(BI->getSuccessor(0) == I1->getParent());
2062 assert(BI->getSuccessor(1) == I2->getParent());
2063 }
2064
2065 // In the case of an if statement, we try to hoist an invoke.
2066 // FIXME: Can we define a safety predicate for CallBr?
2067 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2068 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2069 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2070 return false;
2071
2072 // TODO: callbr hoisting currently disabled pending further study.
2073 if (isa<CallBrInst>(I1))
2074 return false;
2075
2076 for (BasicBlock *Succ : successors(BB1)) {
2077 for (PHINode &PN : Succ->phis()) {
2078 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2079 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2080 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2081 if (BB1V == BB2V)
2082 continue;
2083
2084 // In the case of an if statement, check for
2085 // passingValueIsAlwaysUndefined here because we would rather eliminate
2086 // undefined control flow then converting it to a select.
2087 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2089 return false;
2090 }
2091 }
2092 }
2093
2094 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2095 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2096 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2097 // Clone the terminator and hoist it into the pred, without any debug info.
2098 Instruction *NT = I1->clone();
2099 NT->insertInto(TIParent, TI->getIterator());
2100 if (!NT->getType()->isVoidTy()) {
2101 I1->replaceAllUsesWith(NT);
2102 for (Instruction *OtherSuccTI : OtherSuccTIs)
2103 OtherSuccTI->replaceAllUsesWith(NT);
2104 NT->takeName(I1);
2105 }
2106 Changed = true;
2107 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2108
2109 // Ensure terminator gets a debug location, even an unknown one, in case
2110 // it involves inlinable calls.
2112 Locs.push_back(I1->getDebugLoc());
2113 for (auto *OtherSuccTI : OtherSuccTIs)
2114 Locs.push_back(OtherSuccTI->getDebugLoc());
2115 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2116
2117 // PHIs created below will adopt NT's merged DebugLoc.
2118 IRBuilder<NoFolder> Builder(NT);
2119
2120 // In the case of an if statement, hoisting one of the terminators from our
2121 // successor is a great thing. Unfortunately, the successors of the if/else
2122 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2123 // must agree for all PHI nodes, so we insert select instruction to compute
2124 // the final result.
2125 if (BI) {
2126 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2127 for (BasicBlock *Succ : successors(BB1)) {
2128 for (PHINode &PN : Succ->phis()) {
2129 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2130 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2131 if (BB1V == BB2V)
2132 continue;
2133
2134 // These values do not agree. Insert a select instruction before NT
2135 // that determines the right value.
2136 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2137 if (!SI) {
2138 // Propagate fast-math-flags from phi node to its replacement select.
2140 BI->getCondition(), BB1V, BB2V,
2141 isa<FPMathOperator>(PN) ? &PN : nullptr,
2142 BB1V->getName() + "." + BB2V->getName(), BI));
2143 }
2144
2145 // Make the PHI node use the select for all incoming values for BB1/BB2
2146 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2147 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2148 PN.setIncomingValue(i, SI);
2149 }
2150 }
2151 }
2152
2154
2155 // Update any PHI nodes in our new successors.
2156 for (BasicBlock *Succ : successors(BB1)) {
2157 addPredecessorToBlock(Succ, TIParent, BB1);
2158 if (DTU)
2159 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2160 }
2161
2162 if (DTU) {
2163 // TI might be a switch with multi-cases destination, so we need to care for
2164 // the duplication of successors.
2165 for (BasicBlock *Succ : UniqueSuccessors)
2166 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2167 }
2168
2170 if (DTU)
2171 DTU->applyUpdates(Updates);
2172 return Changed;
2173}
2174
2175// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2176// into variables.
2178 int OpIdx) {
2179 // Divide/Remainder by constant is typically much cheaper than by variable.
2180 if (I->isIntDivRem())
2181 return OpIdx != 1;
2182 return !isa<IntrinsicInst>(I);
2183}
2184
2185// All instructions in Insts belong to different blocks that all unconditionally
2186// branch to a common successor. Analyze each instruction and return true if it
2187// would be possible to sink them into their successor, creating one common
2188// instruction instead. For every value that would be required to be provided by
2189// PHI node (because an operand varies in each input block), add to PHIOperands.
2192 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2193 // Prune out obviously bad instructions to move. Each instruction must have
2194 // the same number of uses, and we check later that the uses are consistent.
2195 std::optional<unsigned> NumUses;
2196 for (auto *I : Insts) {
2197 // These instructions may change or break semantics if moved.
2198 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2199 I->getType()->isTokenTy())
2200 return false;
2201
2202 // Do not try to sink an instruction in an infinite loop - it can cause
2203 // this algorithm to infinite loop.
2204 if (I->getParent()->getSingleSuccessor() == I->getParent())
2205 return false;
2206
2207 // Conservatively return false if I is an inline-asm instruction. Sinking
2208 // and merging inline-asm instructions can potentially create arguments
2209 // that cannot satisfy the inline-asm constraints.
2210 // If the instruction has nomerge or convergent attribute, return false.
2211 if (const auto *C = dyn_cast<CallBase>(I))
2212 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2213 return false;
2214
2215 if (!NumUses)
2216 NumUses = I->getNumUses();
2217 else if (NumUses != I->getNumUses())
2218 return false;
2219 }
2220
2221 const Instruction *I0 = Insts.front();
2222 const auto I0MMRA = MMRAMetadata(*I0);
2223 for (auto *I : Insts) {
2224 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2225 return false;
2226
2227 // Treat MMRAs conservatively. This pass can be quite aggressive and
2228 // could drop a lot of MMRAs otherwise.
2229 if (MMRAMetadata(*I) != I0MMRA)
2230 return false;
2231 }
2232
2233 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2234 // then the other phi operands must match the instructions from Insts. This
2235 // also has to hold true for any phi nodes that would be created as a result
2236 // of sinking. Both of these cases are represented by PhiOperands.
2237 for (const Use &U : I0->uses()) {
2238 auto It = PHIOperands.find(&U);
2239 if (It == PHIOperands.end())
2240 // There may be uses in other blocks when sinking into a loop header.
2241 return false;
2242 if (!equal(Insts, It->second))
2243 return false;
2244 }
2245
2246 // For calls to be sinkable, they must all be indirect, or have same callee.
2247 // I.e. if we have two direct calls to different callees, we don't want to
2248 // turn that into an indirect call. Likewise, if we have an indirect call,
2249 // and a direct call, we don't actually want to have a single indirect call.
2250 if (isa<CallBase>(I0)) {
2251 auto IsIndirectCall = [](const Instruction *I) {
2252 return cast<CallBase>(I)->isIndirectCall();
2253 };
2254 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2255 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2256 if (HaveIndirectCalls) {
2257 if (!AllCallsAreIndirect)
2258 return false;
2259 } else {
2260 // All callees must be identical.
2261 Value *Callee = nullptr;
2262 for (const Instruction *I : Insts) {
2263 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2264 if (!Callee)
2265 Callee = CurrCallee;
2266 else if (Callee != CurrCallee)
2267 return false;
2268 }
2269 }
2270 }
2271
2272 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2273 Value *Op = I0->getOperand(OI);
2274 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2275 assert(I->getNumOperands() == I0->getNumOperands());
2276 return I->getOperand(OI) == I0->getOperand(OI);
2277 };
2278 if (!all_of(Insts, SameAsI0)) {
2281 // We can't create a PHI from this GEP.
2282 return false;
2283 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2284 for (auto *I : Insts)
2285 Ops.push_back(I->getOperand(OI));
2286 }
2287 }
2288 return true;
2289}
2290
2291// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2292// instruction of every block in Blocks to their common successor, commoning
2293// into one instruction.
2295 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2296
2297 // canSinkInstructions returning true guarantees that every block has at
2298 // least one non-terminator instruction.
2300 for (auto *BB : Blocks) {
2301 Instruction *I = BB->getTerminator();
2302 I = I->getPrevNode();
2303 Insts.push_back(I);
2304 }
2305
2306 // We don't need to do any more checking here; canSinkInstructions should
2307 // have done it all for us.
2308 SmallVector<Value*, 4> NewOperands;
2309 Instruction *I0 = Insts.front();
2310 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2311 // This check is different to that in canSinkInstructions. There, we
2312 // cared about the global view once simplifycfg (and instcombine) have
2313 // completed - it takes into account PHIs that become trivially
2314 // simplifiable. However here we need a more local view; if an operand
2315 // differs we create a PHI and rely on instcombine to clean up the very
2316 // small mess we may make.
2317 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2318 return I->getOperand(O) != I0->getOperand(O);
2319 });
2320 if (!NeedPHI) {
2321 NewOperands.push_back(I0->getOperand(O));
2322 continue;
2323 }
2324
2325 // Create a new PHI in the successor block and populate it.
2326 auto *Op = I0->getOperand(O);
2327 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2328 auto *PN =
2329 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2330 PN->insertBefore(BBEnd->begin());
2331 for (auto *I : Insts)
2332 PN->addIncoming(I->getOperand(O), I->getParent());
2333 NewOperands.push_back(PN);
2334 }
2335
2336 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2337 // and move it to the start of the successor block.
2338 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2339 I0->getOperandUse(O).set(NewOperands[O]);
2340
2341 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2342
2343 // Update metadata and IR flags, and merge debug locations.
2344 for (auto *I : Insts)
2345 if (I != I0) {
2346 // The debug location for the "common" instruction is the merged locations
2347 // of all the commoned instructions. We start with the original location
2348 // of the "common" instruction and iteratively merge each location in the
2349 // loop below.
2350 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2351 // However, as N-way merge for CallInst is rare, so we use simplified API
2352 // instead of using complex API for N-way merge.
2353 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2354 combineMetadataForCSE(I0, I, true);
2355 I0->andIRFlags(I);
2356 if (auto *CB = dyn_cast<CallBase>(I0)) {
2357 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2358 assert(Success && "We should not be trying to sink callbases "
2359 "with non-intersectable attributes");
2360 // For NDEBUG Compile.
2361 (void)Success;
2362 }
2363 }
2364
2365 for (User *U : make_early_inc_range(I0->users())) {
2366 // canSinkLastInstruction checked that all instructions are only used by
2367 // phi nodes in a way that allows replacing the phi node with the common
2368 // instruction.
2369 auto *PN = cast<PHINode>(U);
2370 PN->replaceAllUsesWith(I0);
2371 PN->eraseFromParent();
2372 }
2373
2374 // Finally nuke all instructions apart from the common instruction.
2375 for (auto *I : Insts) {
2376 if (I == I0)
2377 continue;
2378 // The remaining uses are debug users, replace those with the common inst.
2379 // In most (all?) cases this just introduces a use-before-def.
2380 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2381 I->replaceAllUsesWith(I0);
2382 I->eraseFromParent();
2383 }
2384}
2385
2386/// Check whether BB's predecessors end with unconditional branches. If it is
2387/// true, sink any common code from the predecessors to BB.
2389 DomTreeUpdater *DTU) {
2390 // We support two situations:
2391 // (1) all incoming arcs are unconditional
2392 // (2) there are non-unconditional incoming arcs
2393 //
2394 // (2) is very common in switch defaults and
2395 // else-if patterns;
2396 //
2397 // if (a) f(1);
2398 // else if (b) f(2);
2399 //
2400 // produces:
2401 //
2402 // [if]
2403 // / \
2404 // [f(1)] [if]
2405 // | | \
2406 // | | |
2407 // | [f(2)]|
2408 // \ | /
2409 // [ end ]
2410 //
2411 // [end] has two unconditional predecessor arcs and one conditional. The
2412 // conditional refers to the implicit empty 'else' arc. This conditional
2413 // arc can also be caused by an empty default block in a switch.
2414 //
2415 // In this case, we attempt to sink code from all *unconditional* arcs.
2416 // If we can sink instructions from these arcs (determined during the scan
2417 // phase below) we insert a common successor for all unconditional arcs and
2418 // connect that to [end], to enable sinking:
2419 //
2420 // [if]
2421 // / \
2422 // [x(1)] [if]
2423 // | | \
2424 // | | \
2425 // | [x(2)] |
2426 // \ / |
2427 // [sink.split] |
2428 // \ /
2429 // [ end ]
2430 //
2431 SmallVector<BasicBlock*,4> UnconditionalPreds;
2432 bool HaveNonUnconditionalPredecessors = false;
2433 for (auto *PredBB : predecessors(BB)) {
2434 auto *PredBr = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2435 if (PredBr)
2436 UnconditionalPreds.push_back(PredBB);
2437 else
2438 HaveNonUnconditionalPredecessors = true;
2439 }
2440 if (UnconditionalPreds.size() < 2)
2441 return false;
2442
2443 // We take a two-step approach to tail sinking. First we scan from the end of
2444 // each block upwards in lockstep. If the n'th instruction from the end of each
2445 // block can be sunk, those instructions are added to ValuesToSink and we
2446 // carry on. If we can sink an instruction but need to PHI-merge some operands
2447 // (because they're not identical in each instruction) we add these to
2448 // PHIOperands.
2449 // We prepopulate PHIOperands with the phis that already exist in BB.
2451 for (PHINode &PN : BB->phis()) {
2453 for (const Use &U : PN.incoming_values())
2454 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2455 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2456 for (BasicBlock *Pred : UnconditionalPreds)
2457 Ops.push_back(*IncomingVals[Pred]);
2458 }
2459
2460 int ScanIdx = 0;
2461 SmallPtrSet<Value*,4> InstructionsToSink;
2462 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2463 while (LRI.isValid() &&
2464 canSinkInstructions(*LRI, PHIOperands)) {
2465 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2466 << "\n");
2467 InstructionsToSink.insert_range(*LRI);
2468 ++ScanIdx;
2469 --LRI;
2470 }
2471
2472 // If no instructions can be sunk, early-return.
2473 if (ScanIdx == 0)
2474 return false;
2475
2476 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2477
2478 if (!followedByDeoptOrUnreachable) {
2479 // Check whether this is the pointer operand of a load/store.
2480 auto IsMemOperand = [](Use &U) {
2481 auto *I = cast<Instruction>(U.getUser());
2482 if (isa<LoadInst>(I))
2483 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2484 if (isa<StoreInst>(I))
2485 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2486 return false;
2487 };
2488
2489 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2490 // actually sink before encountering instruction that is unprofitable to
2491 // sink?
2492 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2493 unsigned NumPHIInsts = 0;
2494 for (Use &U : (*LRI)[0]->operands()) {
2495 auto It = PHIOperands.find(&U);
2496 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2497 return InstructionsToSink.contains(V);
2498 })) {
2499 ++NumPHIInsts;
2500 // Do not separate a load/store from the gep producing the address.
2501 // The gep can likely be folded into the load/store as an addressing
2502 // mode. Additionally, a load of a gep is easier to analyze than a
2503 // load of a phi.
2504 if (IsMemOperand(U) &&
2505 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2506 return false;
2507 // FIXME: this check is overly optimistic. We may end up not sinking
2508 // said instruction, due to the very same profitability check.
2509 // See @creating_too_many_phis in sink-common-code.ll.
2510 }
2511 }
2512 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2513 return NumPHIInsts <= 1;
2514 };
2515
2516 // We've determined that we are going to sink last ScanIdx instructions,
2517 // and recorded them in InstructionsToSink. Now, some instructions may be
2518 // unprofitable to sink. But that determination depends on the instructions
2519 // that we are going to sink.
2520
2521 // First, forward scan: find the first instruction unprofitable to sink,
2522 // recording all the ones that are profitable to sink.
2523 // FIXME: would it be better, after we detect that not all are profitable.
2524 // to either record the profitable ones, or erase the unprofitable ones?
2525 // Maybe we need to choose (at runtime) the one that will touch least
2526 // instrs?
2527 LRI.reset();
2528 int Idx = 0;
2529 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2530 while (Idx < ScanIdx) {
2531 if (!ProfitableToSinkInstruction(LRI)) {
2532 // Too many PHIs would be created.
2533 LLVM_DEBUG(
2534 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2535 break;
2536 }
2537 InstructionsProfitableToSink.insert_range(*LRI);
2538 --LRI;
2539 ++Idx;
2540 }
2541
2542 // If no instructions can be sunk, early-return.
2543 if (Idx == 0)
2544 return false;
2545
2546 // Did we determine that (only) some instructions are unprofitable to sink?
2547 if (Idx < ScanIdx) {
2548 // Okay, some instructions are unprofitable.
2549 ScanIdx = Idx;
2550 InstructionsToSink = InstructionsProfitableToSink;
2551
2552 // But, that may make other instructions unprofitable, too.
2553 // So, do a backward scan, do any earlier instructions become
2554 // unprofitable?
2555 assert(
2556 !ProfitableToSinkInstruction(LRI) &&
2557 "We already know that the last instruction is unprofitable to sink");
2558 ++LRI;
2559 --Idx;
2560 while (Idx >= 0) {
2561 // If we detect that an instruction becomes unprofitable to sink,
2562 // all earlier instructions won't be sunk either,
2563 // so preemptively keep InstructionsProfitableToSink in sync.
2564 // FIXME: is this the most performant approach?
2565 for (auto *I : *LRI)
2566 InstructionsProfitableToSink.erase(I);
2567 if (!ProfitableToSinkInstruction(LRI)) {
2568 // Everything starting with this instruction won't be sunk.
2569 ScanIdx = Idx;
2570 InstructionsToSink = InstructionsProfitableToSink;
2571 }
2572 ++LRI;
2573 --Idx;
2574 }
2575 }
2576
2577 // If no instructions can be sunk, early-return.
2578 if (ScanIdx == 0)
2579 return false;
2580 }
2581
2582 bool Changed = false;
2583
2584 if (HaveNonUnconditionalPredecessors) {
2585 if (!followedByDeoptOrUnreachable) {
2586 // It is always legal to sink common instructions from unconditional
2587 // predecessors. However, if not all predecessors are unconditional,
2588 // this transformation might be pessimizing. So as a rule of thumb,
2589 // don't do it unless we'd sink at least one non-speculatable instruction.
2590 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2591 LRI.reset();
2592 int Idx = 0;
2593 bool Profitable = false;
2594 while (Idx < ScanIdx) {
2595 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2596 Profitable = true;
2597 break;
2598 }
2599 --LRI;
2600 ++Idx;
2601 }
2602 if (!Profitable)
2603 return false;
2604 }
2605
2606 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2607 // We have a conditional edge and we're going to sink some instructions.
2608 // Insert a new block postdominating all blocks we're going to sink from.
2609 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2610 // Edges couldn't be split.
2611 return false;
2612 Changed = true;
2613 }
2614
2615 // Now that we've analyzed all potential sinking candidates, perform the
2616 // actual sink. We iteratively sink the last non-terminator of the source
2617 // blocks into their common successor unless doing so would require too
2618 // many PHI instructions to be generated (currently only one PHI is allowed
2619 // per sunk instruction).
2620 //
2621 // We can use InstructionsToSink to discount values needing PHI-merging that will
2622 // actually be sunk in a later iteration. This allows us to be more
2623 // aggressive in what we sink. This does allow a false positive where we
2624 // sink presuming a later value will also be sunk, but stop half way through
2625 // and never actually sink it which means we produce more PHIs than intended.
2626 // This is unlikely in practice though.
2627 int SinkIdx = 0;
2628 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2629 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2630 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2631 << "\n");
2632
2633 // Because we've sunk every instruction in turn, the current instruction to
2634 // sink is always at index 0.
2635 LRI.reset();
2636
2637 sinkLastInstruction(UnconditionalPreds);
2638 NumSinkCommonInstrs++;
2639 Changed = true;
2640 }
2641 if (SinkIdx != 0)
2642 ++NumSinkCommonCode;
2643 return Changed;
2644}
2645
2646namespace {
2647
2648struct CompatibleSets {
2649 using SetTy = SmallVector<InvokeInst *, 2>;
2650
2652
2653 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2654
2655 SetTy &getCompatibleSet(InvokeInst *II);
2656
2657 void insert(InvokeInst *II);
2658};
2659
2660CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2661 // Perform a linear scan over all the existing sets, see if the new `invoke`
2662 // is compatible with any particular set. Since we know that all the `invokes`
2663 // within a set are compatible, only check the first `invoke` in each set.
2664 // WARNING: at worst, this has quadratic complexity.
2665 for (CompatibleSets::SetTy &Set : Sets) {
2666 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2667 return Set;
2668 }
2669
2670 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2671 return Sets.emplace_back();
2672}
2673
2674void CompatibleSets::insert(InvokeInst *II) {
2675 getCompatibleSet(II).emplace_back(II);
2676}
2677
2678bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2679 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2680
2681 // Can we theoretically merge these `invoke`s?
2682 auto IsIllegalToMerge = [](InvokeInst *II) {
2683 return II->cannotMerge() || II->isInlineAsm();
2684 };
2685 if (any_of(Invokes, IsIllegalToMerge))
2686 return false;
2687
2688 // Either both `invoke`s must be direct,
2689 // or both `invoke`s must be indirect.
2690 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2691 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2692 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2693 if (HaveIndirectCalls) {
2694 if (!AllCallsAreIndirect)
2695 return false;
2696 } else {
2697 // All callees must be identical.
2698 Value *Callee = nullptr;
2699 for (InvokeInst *II : Invokes) {
2700 Value *CurrCallee = II->getCalledOperand();
2701 assert(CurrCallee && "There is always a called operand.");
2702 if (!Callee)
2703 Callee = CurrCallee;
2704 else if (Callee != CurrCallee)
2705 return false;
2706 }
2707 }
2708
2709 // Either both `invoke`s must not have a normal destination,
2710 // or both `invoke`s must have a normal destination,
2711 auto HasNormalDest = [](InvokeInst *II) {
2712 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2713 };
2714 if (any_of(Invokes, HasNormalDest)) {
2715 // Do not merge `invoke` that does not have a normal destination with one
2716 // that does have a normal destination, even though doing so would be legal.
2717 if (!all_of(Invokes, HasNormalDest))
2718 return false;
2719
2720 // All normal destinations must be identical.
2721 BasicBlock *NormalBB = nullptr;
2722 for (InvokeInst *II : Invokes) {
2723 BasicBlock *CurrNormalBB = II->getNormalDest();
2724 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2725 if (!NormalBB)
2726 NormalBB = CurrNormalBB;
2727 else if (NormalBB != CurrNormalBB)
2728 return false;
2729 }
2730
2731 // In the normal destination, the incoming values for these two `invoke`s
2732 // must be compatible.
2733 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2735 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2736 &EquivalenceSet))
2737 return false;
2738 }
2739
2740#ifndef NDEBUG
2741 // All unwind destinations must be identical.
2742 // We know that because we have started from said unwind destination.
2743 BasicBlock *UnwindBB = nullptr;
2744 for (InvokeInst *II : Invokes) {
2745 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2746 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2747 if (!UnwindBB)
2748 UnwindBB = CurrUnwindBB;
2749 else
2750 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2751 }
2752#endif
2753
2754 // In the unwind destination, the incoming values for these two `invoke`s
2755 // must be compatible.
2757 Invokes.front()->getUnwindDest(),
2758 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2759 return false;
2760
2761 // Ignoring arguments, these `invoke`s must be identical,
2762 // including operand bundles.
2763 const InvokeInst *II0 = Invokes.front();
2764 for (auto *II : Invokes.drop_front())
2765 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2766 return false;
2767
2768 // Can we theoretically form the data operands for the merged `invoke`?
2769 auto IsIllegalToMergeArguments = [](auto Ops) {
2770 Use &U0 = std::get<0>(Ops);
2771 Use &U1 = std::get<1>(Ops);
2772 if (U0 == U1)
2773 return false;
2775 U0.getOperandNo());
2776 };
2777 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2778 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2779 IsIllegalToMergeArguments))
2780 return false;
2781
2782 return true;
2783}
2784
2785} // namespace
2786
2787// Merge all invokes in the provided set, all of which are compatible
2788// as per the `CompatibleSets::shouldBelongToSameSet()`.
2790 DomTreeUpdater *DTU) {
2791 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2792
2794 if (DTU)
2795 Updates.reserve(2 + 3 * Invokes.size());
2796
2797 bool HasNormalDest =
2798 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2799
2800 // Clone one of the invokes into a new basic block.
2801 // Since they are all compatible, it doesn't matter which invoke is cloned.
2802 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2803 InvokeInst *II0 = Invokes.front();
2804 BasicBlock *II0BB = II0->getParent();
2805 BasicBlock *InsertBeforeBlock =
2806 II0->getParent()->getIterator()->getNextNode();
2807 Function *Func = II0BB->getParent();
2808 LLVMContext &Ctx = II0->getContext();
2809
2810 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2811 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2812
2813 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2814 // NOTE: all invokes have the same attributes, so no handling needed.
2815 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2816
2817 if (!HasNormalDest) {
2818 // This set does not have a normal destination,
2819 // so just form a new block with unreachable terminator.
2820 BasicBlock *MergedNormalDest = BasicBlock::Create(
2821 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2822 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2823 UI->setDebugLoc(DebugLoc::getTemporary());
2824 MergedInvoke->setNormalDest(MergedNormalDest);
2825 }
2826
2827 // The unwind destination, however, remainds identical for all invokes here.
2828
2829 return MergedInvoke;
2830 }();
2831
2832 if (DTU) {
2833 // Predecessor blocks that contained these invokes will now branch to
2834 // the new block that contains the merged invoke, ...
2835 for (InvokeInst *II : Invokes)
2836 Updates.push_back(
2837 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2838
2839 // ... which has the new `unreachable` block as normal destination,
2840 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2841 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2842 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2843 SuccBBOfMergedInvoke});
2844
2845 // Since predecessor blocks now unconditionally branch to a new block,
2846 // they no longer branch to their original successors.
2847 for (InvokeInst *II : Invokes)
2848 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2849 Updates.push_back(
2850 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2851 }
2852
2853 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2854
2855 // Form the merged operands for the merged invoke.
2856 for (Use &U : MergedInvoke->operands()) {
2857 // Only PHI together the indirect callees and data operands.
2858 if (MergedInvoke->isCallee(&U)) {
2859 if (!IsIndirectCall)
2860 continue;
2861 } else if (!MergedInvoke->isDataOperand(&U))
2862 continue;
2863
2864 // Don't create trivial PHI's with all-identical incoming values.
2865 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2866 return II->getOperand(U.getOperandNo()) != U.get();
2867 });
2868 if (!NeedPHI)
2869 continue;
2870
2871 // Form a PHI out of all the data ops under this index.
2873 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2874 for (InvokeInst *II : Invokes)
2875 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2876
2877 U.set(PN);
2878 }
2879
2880 // We've ensured that each PHI node has compatible (identical) incoming values
2881 // when coming from each of the `invoke`s in the current merge set,
2882 // so update the PHI nodes accordingly.
2883 for (BasicBlock *Succ : successors(MergedInvoke))
2884 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2885 /*ExistPred=*/Invokes.front()->getParent());
2886
2887 // And finally, replace the original `invoke`s with an unconditional branch
2888 // to the block with the merged `invoke`. Also, give that merged `invoke`
2889 // the merged debugloc of all the original `invoke`s.
2890 DILocation *MergedDebugLoc = nullptr;
2891 for (InvokeInst *II : Invokes) {
2892 // Compute the debug location common to all the original `invoke`s.
2893 if (!MergedDebugLoc)
2894 MergedDebugLoc = II->getDebugLoc();
2895 else
2896 MergedDebugLoc =
2897 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2898
2899 // And replace the old `invoke` with an unconditionally branch
2900 // to the block with the merged `invoke`.
2901 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2902 OrigSuccBB->removePredecessor(II->getParent());
2903 auto *BI = UncondBrInst::Create(MergedInvoke->getParent(), II->getParent());
2904 // The unconditional branch is part of the replacement for the original
2905 // invoke, so should use its DebugLoc.
2906 BI->setDebugLoc(II->getDebugLoc());
2907 bool Success = MergedInvoke->tryIntersectAttributes(II);
2908 assert(Success && "Merged invokes with incompatible attributes");
2909 // For NDEBUG Compile
2910 (void)Success;
2911 II->replaceAllUsesWith(MergedInvoke);
2912 II->eraseFromParent();
2913 ++NumInvokesMerged;
2914 }
2915 MergedInvoke->setDebugLoc(MergedDebugLoc);
2916 ++NumInvokeSetsFormed;
2917
2918 if (DTU)
2919 DTU->applyUpdates(Updates);
2920}
2921
2922/// If this block is a `landingpad` exception handling block, categorize all
2923/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2924/// being "mergeable" together, and then merge invokes in each set together.
2925///
2926/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2927/// [...] [...]
2928/// | |
2929/// [invoke0] [invoke1]
2930/// / \ / \
2931/// [cont0] [landingpad] [cont1]
2932/// to:
2933/// [...] [...]
2934/// \ /
2935/// [invoke]
2936/// / \
2937/// [cont] [landingpad]
2938///
2939/// But of course we can only do that if the invokes share the `landingpad`,
2940/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2941/// and the invoked functions are "compatible".
2944 return false;
2945
2946 bool Changed = false;
2947
2948 // FIXME: generalize to all exception handling blocks?
2949 if (!BB->isLandingPad())
2950 return Changed;
2951
2952 CompatibleSets Grouper;
2953
2954 // Record all the predecessors of this `landingpad`. As per verifier,
2955 // the only allowed predecessor is the unwind edge of an `invoke`.
2956 // We want to group "compatible" `invokes` into the same set to be merged.
2957 for (BasicBlock *PredBB : predecessors(BB))
2958 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2959
2960 // And now, merge `invoke`s that were grouped togeter.
2961 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2962 if (Invokes.size() < 2)
2963 continue;
2964 Changed = true;
2965 mergeCompatibleInvokesImpl(Invokes, DTU);
2966 }
2967
2968 return Changed;
2969}
2970
2971namespace {
2972/// Track ephemeral values, which should be ignored for cost-modelling
2973/// purposes. Requires walking instructions in reverse order.
2974class EphemeralValueTracker {
2975 SmallPtrSet<const Instruction *, 32> EphValues;
2976
2977 bool isEphemeral(const Instruction *I) {
2978 if (isa<AssumeInst>(I))
2979 return true;
2980 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2981 all_of(I->users(), [&](const User *U) {
2982 return EphValues.count(cast<Instruction>(U));
2983 });
2984 }
2985
2986public:
2987 bool track(const Instruction *I) {
2988 if (isEphemeral(I)) {
2989 EphValues.insert(I);
2990 return true;
2991 }
2992 return false;
2993 }
2994
2995 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2996};
2997} // namespace
2998
2999/// Determine if we can hoist sink a sole store instruction out of a
3000/// conditional block.
3001///
3002/// We are looking for code like the following:
3003/// BrBB:
3004/// store i32 %add, i32* %arrayidx2
3005/// ... // No other stores or function calls (we could be calling a memory
3006/// ... // function).
3007/// %cmp = icmp ult %x, %y
3008/// br i1 %cmp, label %EndBB, label %ThenBB
3009/// ThenBB:
3010/// store i32 %add5, i32* %arrayidx2
3011/// br label EndBB
3012/// EndBB:
3013/// ...
3014/// We are going to transform this into:
3015/// BrBB:
3016/// store i32 %add, i32* %arrayidx2
3017/// ... //
3018/// %cmp = icmp ult %x, %y
3019/// %add.add5 = select i1 %cmp, i32 %add, %add5
3020/// store i32 %add.add5, i32* %arrayidx2
3021/// ...
3022///
3023/// \return The pointer to the value of the previous store if the store can be
3024/// hoisted into the predecessor block. 0 otherwise.
3026 BasicBlock *StoreBB, BasicBlock *EndBB) {
3027 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3028 if (!StoreToHoist)
3029 return nullptr;
3030
3031 // Volatile or atomic.
3032 if (!StoreToHoist->isSimple())
3033 return nullptr;
3034
3035 Value *StorePtr = StoreToHoist->getPointerOperand();
3036 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3037
3038 // Look for a store to the same pointer in BrBB.
3039 unsigned MaxNumInstToLookAt = 9;
3040 // Skip pseudo probe intrinsic calls which are not really killing any memory
3041 // accesses.
3042 for (Instruction &CurI : reverse(*BrBB)) {
3043 if (!MaxNumInstToLookAt)
3044 break;
3045 --MaxNumInstToLookAt;
3046
3047 if (isa<PseudoProbeInst>(CurI))
3048 continue;
3049
3050 // Could be calling an instruction that affects memory like free().
3051 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3052 return nullptr;
3053
3054 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3055 // Found the previous store to same location and type. Make sure it is
3056 // simple, to avoid introducing a spurious non-atomic write after an
3057 // atomic write.
3058 if (SI->getPointerOperand() == StorePtr &&
3059 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3060 SI->getAlign() >= StoreToHoist->getAlign())
3061 // Found the previous store, return its value operand.
3062 return SI->getValueOperand();
3063 return nullptr; // Unknown store.
3064 }
3065
3066 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3067 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3068 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3069 Value *Obj = getUnderlyingObject(StorePtr);
3070 bool ExplicitlyDereferenceableOnly;
3071 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3073 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3075 (!ExplicitlyDereferenceableOnly ||
3076 isDereferenceablePointer(StorePtr, StoreTy,
3077 LI->getDataLayout()))) {
3078 // Found a previous load, return it.
3079 return LI;
3080 }
3081 }
3082 // The load didn't work out, but we may still find a store.
3083 }
3084 }
3085
3086 return nullptr;
3087}
3088
3089/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3090/// converted to selects.
3092 BasicBlock *EndBB,
3093 unsigned &SpeculatedInstructions,
3094 InstructionCost &Cost,
3095 const TargetTransformInfo &TTI) {
3097 BB->getParent()->hasMinSize()
3100
3101 bool HaveRewritablePHIs = false;
3102 for (PHINode &PN : EndBB->phis()) {
3103 Value *OrigV = PN.getIncomingValueForBlock(BB);
3104 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3105
3106 // FIXME: Try to remove some of the duplication with
3107 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3108 if (ThenV == OrigV)
3109 continue;
3110
3111 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3112 CmpInst::makeCmpResultType(PN.getType()),
3114
3115 // Don't convert to selects if we could remove undefined behavior instead.
3116 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3118 return false;
3119
3120 HaveRewritablePHIs = true;
3121 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3122 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3123 if (!OrigCE && !ThenCE)
3124 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3125
3126 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3127 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3128 InstructionCost MaxCost =
3130 if (OrigCost + ThenCost > MaxCost)
3131 return false;
3132
3133 // Account for the cost of an unfolded ConstantExpr which could end up
3134 // getting expanded into Instructions.
3135 // FIXME: This doesn't account for how many operations are combined in the
3136 // constant expression.
3137 ++SpeculatedInstructions;
3138 if (SpeculatedInstructions > 1)
3139 return false;
3140 }
3141
3142 return HaveRewritablePHIs;
3143}
3144
3146 std::optional<bool> Invert,
3147 const TargetTransformInfo &TTI) {
3148 // If the branch is non-unpredictable, and is predicted to *not* branch to
3149 // the `then` block, then avoid speculating it.
3150 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3151 return true;
3152
3153 uint64_t TWeight, FWeight;
3154 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3155 return true;
3156
3157 if (!Invert.has_value())
3158 return false;
3159
3160 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3161 BranchProbability BIEndProb =
3162 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3163 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3164 return BIEndProb < Likely;
3165}
3166
3167/// Speculate a conditional basic block flattening the CFG.
3168///
3169/// Note that this is a very risky transform currently. Speculating
3170/// instructions like this is most often not desirable. Instead, there is an MI
3171/// pass which can do it with full awareness of the resource constraints.
3172/// However, some cases are "obvious" and we should do directly. An example of
3173/// this is speculating a single, reasonably cheap instruction.
3174///
3175/// There is only one distinct advantage to flattening the CFG at the IR level:
3176/// it makes very common but simplistic optimizations such as are common in
3177/// instcombine and the DAG combiner more powerful by removing CFG edges and
3178/// modeling their effects with easier to reason about SSA value graphs.
3179///
3180///
3181/// An illustration of this transform is turning this IR:
3182/// \code
3183/// BB:
3184/// %cmp = icmp ult %x, %y
3185/// br i1 %cmp, label %EndBB, label %ThenBB
3186/// ThenBB:
3187/// %sub = sub %x, %y
3188/// br label BB2
3189/// EndBB:
3190/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3191/// ...
3192/// \endcode
3193///
3194/// Into this IR:
3195/// \code
3196/// BB:
3197/// %cmp = icmp ult %x, %y
3198/// %sub = sub %x, %y
3199/// %cond = select i1 %cmp, 0, %sub
3200/// ...
3201/// \endcode
3202///
3203/// \returns true if the conditional block is removed.
3204bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3205 BasicBlock *ThenBB) {
3206 if (!Options.SpeculateBlocks)
3207 return false;
3208
3209 // Be conservative for now. FP select instruction can often be expensive.
3210 Value *BrCond = BI->getCondition();
3211 if (isa<FCmpInst>(BrCond))
3212 return false;
3213
3214 BasicBlock *BB = BI->getParent();
3215 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3216 InstructionCost Budget =
3218
3219 // If ThenBB is actually on the false edge of the conditional branch, remember
3220 // to swap the select operands later.
3221 bool Invert = false;
3222 if (ThenBB != BI->getSuccessor(0)) {
3223 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3224 Invert = true;
3225 }
3226 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3227
3228 if (!isProfitableToSpeculate(BI, Invert, TTI))
3229 return false;
3230
3231 // Keep a count of how many times instructions are used within ThenBB when
3232 // they are candidates for sinking into ThenBB. Specifically:
3233 // - They are defined in BB, and
3234 // - They have no side effects, and
3235 // - All of their uses are in ThenBB.
3236 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3237
3238 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3239
3240 unsigned SpeculatedInstructions = 0;
3241 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3242 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3243 Value *SpeculatedStoreValue = nullptr;
3244 StoreInst *SpeculatedStore = nullptr;
3245 EphemeralValueTracker EphTracker;
3246 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3247 // Skip pseudo probes. The consequence is we lose track of the branch
3248 // probability for ThenBB, which is fine since the optimization here takes
3249 // place regardless of the branch probability.
3250 if (isa<PseudoProbeInst>(I)) {
3251 // The probe should be deleted so that it will not be over-counted when
3252 // the samples collected on the non-conditional path are counted towards
3253 // the conditional path. We leave it for the counts inference algorithm to
3254 // figure out a proper count for an unknown probe.
3255 SpeculatedPseudoProbes.push_back(&I);
3256 continue;
3257 }
3258
3259 // Ignore ephemeral values, they will be dropped by the transform.
3260 if (EphTracker.track(&I))
3261 continue;
3262
3263 // Only speculatively execute a single instruction (not counting the
3264 // terminator) for now.
3265 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3267 SpeculatedConditionalLoadsStores.size() <
3269 // Not count load/store into cost if target supports conditional faulting
3270 // b/c it's cheap to speculate it.
3271 if (IsSafeCheapLoadStore)
3272 SpeculatedConditionalLoadsStores.push_back(&I);
3273 else
3274 ++SpeculatedInstructions;
3275
3276 if (SpeculatedInstructions > 1)
3277 return false;
3278
3279 // Don't hoist the instruction if it's unsafe or expensive.
3280 if (!IsSafeCheapLoadStore &&
3282 !(HoistCondStores && !SpeculatedStoreValue &&
3283 (SpeculatedStoreValue =
3284 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3285 return false;
3286 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3289 return false;
3290
3291 // Store the store speculation candidate.
3292 if (!SpeculatedStore && SpeculatedStoreValue)
3293 SpeculatedStore = cast<StoreInst>(&I);
3294
3295 // Do not hoist the instruction if any of its operands are defined but not
3296 // used in BB. The transformation will prevent the operand from
3297 // being sunk into the use block.
3298 for (Use &Op : I.operands()) {
3300 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3301 continue; // Not a candidate for sinking.
3302
3303 ++SinkCandidateUseCounts[OpI];
3304 }
3305 }
3306
3307 // Consider any sink candidates which are only used in ThenBB as costs for
3308 // speculation. Note, while we iterate over a DenseMap here, we are summing
3309 // and so iteration order isn't significant.
3310 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3311 if (Inst->hasNUses(Count)) {
3312 ++SpeculatedInstructions;
3313 if (SpeculatedInstructions > 1)
3314 return false;
3315 }
3316
3317 // Check that we can insert the selects and that it's not too expensive to do
3318 // so.
3319 bool Convert =
3320 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3322 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3323 SpeculatedInstructions, Cost, TTI);
3324 if (!Convert || Cost > Budget)
3325 return false;
3326
3327 // If we get here, we can hoist the instruction and if-convert.
3328 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3329
3330 Instruction *Sel = nullptr;
3331 // Insert a select of the value of the speculated store.
3332 if (SpeculatedStoreValue) {
3333 IRBuilder<NoFolder> Builder(BI);
3334 Value *OrigV = SpeculatedStore->getValueOperand();
3335 Value *TrueV = SpeculatedStore->getValueOperand();
3336 Value *FalseV = SpeculatedStoreValue;
3337 if (Invert)
3338 std::swap(TrueV, FalseV);
3339 Value *S = Builder.CreateSelect(
3340 BrCond, TrueV, FalseV, "spec.store.select", BI);
3341 Sel = cast<Instruction>(S);
3342 SpeculatedStore->setOperand(0, S);
3343 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3344 SpeculatedStore->getDebugLoc());
3345 // The value stored is still conditional, but the store itself is now
3346 // unconditonally executed, so we must be sure that any linked dbg.assign
3347 // intrinsics are tracking the new stored value (the result of the
3348 // select). If we don't, and the store were to be removed by another pass
3349 // (e.g. DSE), then we'd eventually end up emitting a location describing
3350 // the conditional value, unconditionally.
3351 //
3352 // === Before this transformation ===
3353 // pred:
3354 // store %one, %x.dest, !DIAssignID !1
3355 // dbg.assign %one, "x", ..., !1, ...
3356 // br %cond if.then
3357 //
3358 // if.then:
3359 // store %two, %x.dest, !DIAssignID !2
3360 // dbg.assign %two, "x", ..., !2, ...
3361 //
3362 // === After this transformation ===
3363 // pred:
3364 // store %one, %x.dest, !DIAssignID !1
3365 // dbg.assign %one, "x", ..., !1
3366 /// ...
3367 // %merge = select %cond, %two, %one
3368 // store %merge, %x.dest, !DIAssignID !2
3369 // dbg.assign %merge, "x", ..., !2
3370 for (DbgVariableRecord *DbgAssign :
3371 at::getDVRAssignmentMarkers(SpeculatedStore))
3372 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3373 DbgAssign->replaceVariableLocationOp(OrigV, S);
3374 }
3375
3376 // Metadata can be dependent on the condition we are hoisting above.
3377 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3378 // to avoid making it appear as if the condition is a constant, which would
3379 // be misleading while debugging.
3380 // Similarly strip attributes that maybe dependent on condition we are
3381 // hoisting above.
3382 for (auto &I : make_early_inc_range(*ThenBB)) {
3383 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3384 I.dropLocation();
3385 }
3386 I.dropUBImplyingAttrsAndMetadata();
3387
3388 // Drop ephemeral values.
3389 if (EphTracker.contains(&I)) {
3390 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3391 I.eraseFromParent();
3392 }
3393 }
3394
3395 // Hoist the instructions.
3396 // Drop DbgVariableRecords attached to these instructions.
3397 for (auto &It : *ThenBB)
3398 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3399 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3400 // equivalent).
3401 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3402 !DVR || !DVR->isDbgAssign())
3403 It.dropOneDbgRecord(&DR);
3404 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3405 std::prev(ThenBB->end()));
3406
3407 if (!SpeculatedConditionalLoadsStores.empty())
3408 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3409 Sel);
3410
3411 // Insert selects and rewrite the PHI operands.
3412 IRBuilder<NoFolder> Builder(BI);
3413 for (PHINode &PN : EndBB->phis()) {
3414 unsigned OrigI = PN.getBasicBlockIndex(BB);
3415 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3416 Value *OrigV = PN.getIncomingValue(OrigI);
3417 Value *ThenV = PN.getIncomingValue(ThenI);
3418
3419 // Skip PHIs which are trivial.
3420 if (OrigV == ThenV)
3421 continue;
3422
3423 // Create a select whose true value is the speculatively executed value and
3424 // false value is the pre-existing value. Swap them if the branch
3425 // destinations were inverted.
3426 Value *TrueV = ThenV, *FalseV = OrigV;
3427 if (Invert)
3428 std::swap(TrueV, FalseV);
3429 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3430 PN.setIncomingValue(OrigI, V);
3431 PN.setIncomingValue(ThenI, V);
3432 }
3433
3434 // Remove speculated pseudo probes.
3435 for (Instruction *I : SpeculatedPseudoProbes)
3436 I->eraseFromParent();
3437
3438 ++NumSpeculations;
3439 return true;
3440}
3441
3442/// Return true if we can thread a branch across this block.
3444 int Size = 0;
3445 EphemeralValueTracker EphTracker;
3446
3447 // Walk the loop in reverse so that we can identify ephemeral values properly
3448 // (values only feeding assumes).
3449 for (Instruction &I : reverse(*BB)) {
3450 // Can't fold blocks that contain noduplicate or convergent calls.
3451 if (CallInst *CI = dyn_cast<CallInst>(&I))
3452 if (CI->cannotDuplicate() || CI->isConvergent())
3453 return false;
3454
3455 // Ignore ephemeral values which are deleted during codegen.
3456 // We will delete Phis while threading, so Phis should not be accounted in
3457 // block's size.
3458 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3459 if (Size++ > MaxSmallBlockSize)
3460 return false; // Don't clone large BB's.
3461 }
3462
3463 // We can only support instructions that do not define values that are
3464 // live outside of the current basic block.
3465 for (User *U : I.users()) {
3467 if (UI->getParent() != BB || isa<PHINode>(UI))
3468 return false;
3469 }
3470
3471 // Looks ok, continue checking.
3472 }
3473
3474 return true;
3475}
3476
3478 BasicBlock *To) {
3479 // Don't look past the block defining the value, we might get the value from
3480 // a previous loop iteration.
3481 auto *I = dyn_cast<Instruction>(V);
3482 if (I && I->getParent() == To)
3483 return nullptr;
3484
3485 // We know the value if the From block branches on it.
3486 auto *BI = dyn_cast<CondBrInst>(From->getTerminator());
3487 if (BI && BI->getCondition() == V &&
3488 BI->getSuccessor(0) != BI->getSuccessor(1))
3489 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3490 : ConstantInt::getFalse(BI->getContext());
3491
3492 return nullptr;
3493}
3494
3495/// If we have a conditional branch on something for which we know the constant
3496/// value in predecessors (e.g. a phi node in the current block), thread edges
3497/// from the predecessor to their ultimate destination.
3498static std::optional<bool>
3500 const DataLayout &DL,
3501 AssumptionCache *AC) {
3503 BasicBlock *BB = BI->getParent();
3504 Value *Cond = BI->getCondition();
3506 if (PN && PN->getParent() == BB) {
3507 // Degenerate case of a single entry PHI.
3508 if (PN->getNumIncomingValues() == 1) {
3510 return true;
3511 }
3512
3513 for (Use &U : PN->incoming_values())
3514 if (auto *CB = dyn_cast<ConstantInt>(U))
3515 KnownValues[CB].insert(PN->getIncomingBlock(U));
3516 } else {
3517 for (BasicBlock *Pred : predecessors(BB)) {
3518 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3519 KnownValues[CB].insert(Pred);
3520 }
3521 }
3522
3523 if (KnownValues.empty())
3524 return false;
3525
3526 // Now we know that this block has multiple preds and two succs.
3527 // Check that the block is small enough and values defined in the block are
3528 // not used outside of it.
3530 return false;
3531
3532 for (const auto &Pair : KnownValues) {
3533 // Okay, we now know that all edges from PredBB should be revectored to
3534 // branch to RealDest.
3535 ConstantInt *CB = Pair.first;
3536 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3537 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3538
3539 if (RealDest == BB)
3540 continue; // Skip self loops.
3541
3542 // Skip if the predecessor's terminator is an indirect branch.
3543 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3544 return isa<IndirectBrInst>(PredBB->getTerminator());
3545 }))
3546 continue;
3547
3548 LLVM_DEBUG({
3549 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3550 << " has value " << *Pair.first << " in predecessors:\n";
3551 for (const BasicBlock *PredBB : Pair.second)
3552 dbgs() << " " << PredBB->getName() << "\n";
3553 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3554 });
3555
3556 // Split the predecessors we are threading into a new edge block. We'll
3557 // clone the instructions into this block, and then redirect it to RealDest.
3558 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3559 if (!EdgeBB)
3560 continue;
3561
3562 // TODO: These just exist to reduce test diff, we can drop them if we like.
3563 EdgeBB->setName(RealDest->getName() + ".critedge");
3564 EdgeBB->moveBefore(RealDest);
3565
3566 // Update PHI nodes.
3567 addPredecessorToBlock(RealDest, EdgeBB, BB);
3568
3569 // BB may have instructions that are being threaded over. Clone these
3570 // instructions into EdgeBB. We know that there will be no uses of the
3571 // cloned instructions outside of EdgeBB.
3572 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3573 ValueToValueMapTy TranslateMap; // Track translated values.
3574 TranslateMap[Cond] = CB;
3575
3576 // RemoveDIs: track instructions that we optimise away while folding, so
3577 // that we can copy DbgVariableRecords from them later.
3578 BasicBlock::iterator SrcDbgCursor = BB->begin();
3579 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3580 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3581 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3582 continue;
3583 }
3584 // Clone the instruction.
3585 Instruction *N = BBI->clone();
3586 // Insert the new instruction into its new home.
3587 N->insertInto(EdgeBB, InsertPt);
3588
3589 if (BBI->hasName())
3590 N->setName(BBI->getName() + ".c");
3591
3592 // Update operands due to translation.
3593 // Key Instructions: Remap all the atom groups.
3594 if (const DebugLoc &DL = BBI->getDebugLoc())
3595 mapAtomInstance(DL, TranslateMap);
3596 RemapInstruction(N, TranslateMap,
3598
3599 // Check for trivial simplification.
3600 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3601 if (!BBI->use_empty())
3602 TranslateMap[&*BBI] = V;
3603 if (!N->mayHaveSideEffects()) {
3604 N->eraseFromParent(); // Instruction folded away, don't need actual
3605 // inst
3606 N = nullptr;
3607 }
3608 } else {
3609 if (!BBI->use_empty())
3610 TranslateMap[&*BBI] = N;
3611 }
3612 if (N) {
3613 // Copy all debug-info attached to instructions from the last we
3614 // successfully clone, up to this instruction (they might have been
3615 // folded away).
3616 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3617 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3618 SrcDbgCursor = std::next(BBI);
3619 // Clone debug-info on this instruction too.
3620 N->cloneDebugInfoFrom(&*BBI);
3621
3622 // Register the new instruction with the assumption cache if necessary.
3623 if (auto *Assume = dyn_cast<AssumeInst>(N))
3624 if (AC)
3625 AC->registerAssumption(Assume);
3626 }
3627 }
3628
3629 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3630 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3631 InsertPt->cloneDebugInfoFrom(BI);
3632
3633 BB->removePredecessor(EdgeBB);
3634 UncondBrInst *EdgeBI = cast<UncondBrInst>(EdgeBB->getTerminator());
3635 EdgeBI->setSuccessor(0, RealDest);
3636 EdgeBI->setDebugLoc(BI->getDebugLoc());
3637
3638 if (DTU) {
3640 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3641 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3642 DTU->applyUpdates(Updates);
3643 }
3644
3645 // For simplicity, we created a separate basic block for the edge. Merge
3646 // it back into the predecessor if possible. This not only avoids
3647 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3648 // bypass the check for trivial cycles above.
3649 MergeBlockIntoPredecessor(EdgeBB, DTU);
3650
3651 // Signal repeat, simplifying any other constants.
3652 return std::nullopt;
3653 }
3654
3655 return false;
3656}
3657
3658bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3659 // Note: If BB is a loop header then there is a risk that threading introduces
3660 // a non-canonical loop by moving a back edge. So we avoid this optimization
3661 // for loop headers if NeedCanonicalLoop is set.
3662 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3663 return false;
3664
3665 std::optional<bool> Result;
3666 bool EverChanged = false;
3667 do {
3668 // Note that None means "we changed things, but recurse further."
3669 Result =
3671 EverChanged |= Result == std::nullopt || *Result;
3672 } while (Result == std::nullopt);
3673 return EverChanged;
3674}
3675
3676/// Given a BB that starts with the specified two-entry PHI node,
3677/// see if we can eliminate it.
3680 const DataLayout &DL,
3681 bool SpeculateUnpredictables) {
3682 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3683 // statement", which has a very simple dominance structure. Basically, we
3684 // are trying to find the condition that is being branched on, which
3685 // subsequently causes this merge to happen. We really want control
3686 // dependence information for this check, but simplifycfg can't keep it up
3687 // to date, and this catches most of the cases we care about anyway.
3688 BasicBlock *BB = PN->getParent();
3689
3690 BasicBlock *IfTrue, *IfFalse;
3691 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3692 if (!DomBI)
3693 return false;
3694 Value *IfCond = DomBI->getCondition();
3695 // Don't bother if the branch will be constant folded trivially.
3696 if (isa<ConstantInt>(IfCond))
3697 return false;
3698
3699 BasicBlock *DomBlock = DomBI->getParent();
3701 llvm::copy_if(PN->blocks(), std::back_inserter(IfBlocks),
3702 [](BasicBlock *IfBlock) {
3703 return isa<UncondBrInst>(IfBlock->getTerminator());
3704 });
3705 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3706 "Will have either one or two blocks to speculate.");
3707
3708 // If the branch is non-unpredictable, see if we either predictably jump to
3709 // the merge bb (if we have only a single 'then' block), or if we predictably
3710 // jump to one specific 'then' block (if we have two of them).
3711 // It isn't beneficial to speculatively execute the code
3712 // from the block that we know is predictably not entered.
3713 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3714 if (!IsUnpredictable) {
3715 uint64_t TWeight, FWeight;
3716 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3717 (TWeight + FWeight) != 0) {
3718 BranchProbability BITrueProb =
3719 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3720 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3721 BranchProbability BIFalseProb = BITrueProb.getCompl();
3722 if (IfBlocks.size() == 1) {
3723 BranchProbability BIBBProb =
3724 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3725 if (BIBBProb >= Likely)
3726 return false;
3727 } else {
3728 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3729 return false;
3730 }
3731 }
3732 }
3733
3734 // Don't try to fold an unreachable block. For example, the phi node itself
3735 // can't be the candidate if-condition for a select that we want to form.
3736 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3737 if (IfCondPhiInst->getParent() == BB)
3738 return false;
3739
3740 // Okay, we found that we can merge this two-entry phi node into a select.
3741 // Doing so would require us to fold *all* two entry phi nodes in this block.
3742 // At some point this becomes non-profitable (particularly if the target
3743 // doesn't support cmov's). Only do this transformation if there are two or
3744 // fewer PHI nodes in this block.
3745 unsigned NumPhis = 0;
3746 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3747 if (NumPhis > 2)
3748 return false;
3749
3750 // Loop over the PHI's seeing if we can promote them all to select
3751 // instructions. While we are at it, keep track of the instructions
3752 // that need to be moved to the dominating block.
3753 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3754 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3755 InstructionCost Cost = 0;
3756 InstructionCost Budget =
3758 if (SpeculateUnpredictables && IsUnpredictable)
3759 Budget += TTI.getBranchMispredictPenalty();
3760
3761 bool Changed = false;
3762 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3763 PHINode *PN = cast<PHINode>(II++);
3764 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3765 PN->replaceAllUsesWith(V);
3766 PN->eraseFromParent();
3767 Changed = true;
3768 continue;
3769 }
3770
3771 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3772 AggressiveInsts, Cost, Budget, TTI, AC,
3773 ZeroCostInstructions) ||
3774 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3775 AggressiveInsts, Cost, Budget, TTI, AC,
3776 ZeroCostInstructions))
3777 return Changed;
3778 }
3779
3780 // If we folded the first phi, PN dangles at this point. Refresh it. If
3781 // we ran out of PHIs then we simplified them all.
3782 PN = dyn_cast<PHINode>(BB->begin());
3783 if (!PN)
3784 return true;
3785
3786 // Don't fold i1 branches on PHIs which contain binary operators or
3787 // (possibly inverted) select form of or/ands if their parameters are
3788 // an equality test.
3789 auto IsBinOpOrAndEq = [](Value *V) {
3790 CmpPredicate Pred;
3791 if (match(V, m_CombineOr(
3793 m_BinOp(m_Cmp(Pred, m_Value(), m_Value()), m_Value()),
3794 m_BinOp(m_Value(), m_Cmp(Pred, m_Value(), m_Value()))),
3796 m_Cmp(Pred, m_Value(), m_Value()))))) {
3797 return CmpInst::isEquality(Pred);
3798 }
3799 return false;
3800 };
3801 if (PN->getType()->isIntegerTy(1) &&
3802 (IsBinOpOrAndEq(PN->getIncomingValue(0)) ||
3803 IsBinOpOrAndEq(PN->getIncomingValue(1)) || IsBinOpOrAndEq(IfCond)))
3804 return Changed;
3805
3806 // If all PHI nodes are promotable, check to make sure that all instructions
3807 // in the predecessor blocks can be promoted as well. If not, we won't be able
3808 // to get rid of the control flow, so it's not worth promoting to select
3809 // instructions.
3810 for (BasicBlock *IfBlock : IfBlocks)
3811 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3812 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3813 // This is not an aggressive instruction that we can promote.
3814 // Because of this, we won't be able to get rid of the control flow, so
3815 // the xform is not worth it.
3816 return Changed;
3817 }
3818
3819 // If either of the blocks has it's address taken, we can't do this fold.
3820 if (any_of(IfBlocks,
3821 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3822 return Changed;
3823
3824 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3825 if (IsUnpredictable) dbgs() << " (unpredictable)";
3826 dbgs() << " T: " << IfTrue->getName()
3827 << " F: " << IfFalse->getName() << "\n");
3828
3829 // If we can still promote the PHI nodes after this gauntlet of tests,
3830 // do all of the PHI's now.
3831
3832 // Move all 'aggressive' instructions, which are defined in the
3833 // conditional parts of the if's up to the dominating block.
3834 for (BasicBlock *IfBlock : IfBlocks)
3835 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3836
3837 IRBuilder<NoFolder> Builder(DomBI);
3838 // Propagate fast-math-flags from phi nodes to replacement selects.
3839 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3840 // Change the PHI node into a select instruction.
3841 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3842 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3843
3844 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3845 isa<FPMathOperator>(PN) ? PN : nullptr,
3846 "", DomBI);
3847 PN->replaceAllUsesWith(Sel);
3848 Sel->takeName(PN);
3849 PN->eraseFromParent();
3850 }
3851
3852 // At this point, all IfBlocks are empty, so our if statement
3853 // has been flattened. Change DomBlock to jump directly to our new block to
3854 // avoid other simplifycfg's kicking in on the diamond.
3855 Builder.CreateBr(BB);
3856
3858 if (DTU) {
3859 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3860 for (auto *Successor : successors(DomBlock))
3861 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3862 }
3863
3864 DomBI->eraseFromParent();
3865 if (DTU)
3866 DTU->applyUpdates(Updates);
3867
3868 return true;
3869}
3870
3873 Value *RHS, const Twine &Name = "") {
3874 // Try to relax logical op to binary op.
3875 if (impliesPoison(RHS, LHS))
3876 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3877 if (Opc == Instruction::And)
3878 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3879 if (Opc == Instruction::Or)
3880 return Builder.CreateLogicalOr(LHS, RHS, Name);
3881 llvm_unreachable("Invalid logical opcode");
3882}
3883
3884/// Return true if either PBI or BI has branch weight available, and store
3885/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3886/// not have branch weight, use 1:1 as its weight.
3888 uint64_t &PredTrueWeight,
3889 uint64_t &PredFalseWeight,
3890 uint64_t &SuccTrueWeight,
3891 uint64_t &SuccFalseWeight) {
3892 bool PredHasWeights =
3893 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3894 bool SuccHasWeights =
3895 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3896 if (PredHasWeights || SuccHasWeights) {
3897 if (!PredHasWeights)
3898 PredTrueWeight = PredFalseWeight = 1;
3899 if (!SuccHasWeights)
3900 SuccTrueWeight = SuccFalseWeight = 1;
3901 return true;
3902 } else {
3903 return false;
3904 }
3905}
3906
3907/// Determine if the two branches share a common destination and deduce a glue
3908/// that joins the branches' conditions to arrive at the common destination if
3909/// that would be profitable.
3910static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3912 const TargetTransformInfo *TTI) {
3913 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3914 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3915 "PredBB must be a predecessor of BB.");
3916
3917 // We have the potential to fold the conditions together, but if the
3918 // predecessor branch is predictable, we may not want to merge them.
3919 uint64_t PTWeight, PFWeight;
3920 BranchProbability PBITrueProb, Likely;
3921 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3922 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3923 (PTWeight + PFWeight) != 0) {
3924 PBITrueProb =
3925 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3926 Likely = TTI->getPredictableBranchThreshold();
3927 }
3928
3929 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3930 // Speculate the 2nd condition unless the 1st is probably true.
3931 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3932 return {{BI->getSuccessor(0), Instruction::Or, false}};
3933 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3934 // Speculate the 2nd condition unless the 1st is probably false.
3935 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3936 return {{BI->getSuccessor(1), Instruction::And, false}};
3937 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3938 // Speculate the 2nd condition unless the 1st is probably true.
3939 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3940 return {{BI->getSuccessor(1), Instruction::And, true}};
3941 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3942 // Speculate the 2nd condition unless the 1st is probably false.
3943 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3944 return {{BI->getSuccessor(0), Instruction::Or, true}};
3945 }
3946 return std::nullopt;
3947}
3948
3950 DomTreeUpdater *DTU,
3951 MemorySSAUpdater *MSSAU,
3952 const TargetTransformInfo *TTI) {
3953 BasicBlock *BB = BI->getParent();
3954 BasicBlock *PredBlock = PBI->getParent();
3955
3956 // Determine if the two branches share a common destination.
3957 BasicBlock *CommonSucc;
3959 bool InvertPredCond;
3960 std::tie(CommonSucc, Opc, InvertPredCond) =
3962
3963 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3964
3965 IRBuilder<> Builder(PBI);
3966 // The builder is used to create instructions to eliminate the branch in BB.
3967 // If BB's terminator has !annotation metadata, add it to the new
3968 // instructions.
3969 Builder.CollectMetadataToCopy(BB->getTerminator(),
3970 {LLVMContext::MD_annotation});
3971
3972 // If we need to invert the condition in the pred block to match, do so now.
3973 if (InvertPredCond) {
3974 InvertBranch(PBI, Builder);
3975 }
3976
3977 BasicBlock *UniqueSucc =
3978 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3979
3980 // Before cloning instructions, notify the successor basic block that it
3981 // is about to have a new predecessor. This will update PHI nodes,
3982 // which will allow us to update live-out uses of bonus instructions.
3983 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3984
3985 // Try to update branch weights.
3986 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3987 SmallVector<uint64_t, 2> MDWeights;
3988 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3989 SuccTrueWeight, SuccFalseWeight)) {
3990
3991 if (PBI->getSuccessor(0) == BB) {
3992 // PBI: br i1 %x, BB, FalseDest
3993 // BI: br i1 %y, UniqueSucc, FalseDest
3994 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3995 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
3996 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3997 // TrueWeight for PBI * FalseWeight for BI.
3998 // We assume that total weights of a CondBrInst can fit into 32 bits.
3999 // Therefore, we will not have overflow using 64-bit arithmetic.
4000 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4001 PredTrueWeight * SuccFalseWeight);
4002 } else {
4003 // PBI: br i1 %x, TrueDest, BB
4004 // BI: br i1 %y, TrueDest, UniqueSucc
4005 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4006 // FalseWeight for PBI * TrueWeight for BI.
4007 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4008 PredFalseWeight * SuccTrueWeight);
4009 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4010 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4011 }
4012
4013 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4014 /*ElideAllZero=*/true);
4015
4016 // TODO: If BB is reachable from all paths through PredBlock, then we
4017 // could replace PBI's branch probabilities with BI's.
4018 } else
4019 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4020
4021 // Now, update the CFG.
4022 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4023
4024 if (DTU)
4025 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4026 {DominatorTree::Delete, PredBlock, BB}});
4027
4028 // If BI was a loop latch, it may have had associated loop metadata.
4029 // We need to copy it to the new latch, that is, PBI.
4030 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4031 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4032
4033 ValueToValueMapTy VMap; // maps original values to cloned values
4035
4036 Module *M = BB->getModule();
4037
4038 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4039 for (DbgVariableRecord &DVR :
4041 RemapDbgRecord(M, &DVR, VMap,
4043 }
4044
4045 // Now that the Cond was cloned into the predecessor basic block,
4046 // or/and the two conditions together.
4047 Value *BICond = VMap[BI->getCondition()];
4048 PBI->setCondition(
4049 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4051 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4052 if (!MDWeights.empty()) {
4053 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4054 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4055 /*IsExpected=*/false, /*ElideAllZero=*/true);
4056 }
4057
4058 ++NumFoldBranchToCommonDest;
4059 return true;
4060}
4061
4062/// Return if an instruction's type or any of its operands' types are a vector
4063/// type.
4064static bool isVectorOp(Instruction &I) {
4065 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4066 return U->getType()->isVectorTy();
4067 });
4068}
4069
4070/// If this basic block is simple enough, and if a predecessor branches to us
4071/// and one of our successors, fold the block into the predecessor and use
4072/// logical operations to pick the right destination.
4074 MemorySSAUpdater *MSSAU,
4075 const TargetTransformInfo *TTI,
4076 unsigned BonusInstThreshold) {
4077 BasicBlock *BB = BI->getParent();
4081
4083
4085 Cond->getParent() != BB || !Cond->hasOneUse())
4086 return false;
4087
4088 // Finally, don't infinitely unroll conditional loops.
4089 if (is_contained(successors(BB), BB))
4090 return false;
4091
4092 // With which predecessors will we want to deal with?
4094 for (BasicBlock *PredBlock : predecessors(BB)) {
4095 CondBrInst *PBI = dyn_cast<CondBrInst>(PredBlock->getTerminator());
4096
4097 // Check that we have two conditional branches. If there is a PHI node in
4098 // the common successor, verify that the same value flows in from both
4099 // blocks.
4100 if (!PBI || !safeToMergeTerminators(BI, PBI))
4101 continue;
4102
4103 // Determine if the two branches share a common destination.
4104 BasicBlock *CommonSucc;
4106 bool InvertPredCond;
4107 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4108 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4109 else
4110 continue;
4111
4112 // Check the cost of inserting the necessary logic before performing the
4113 // transformation.
4114 if (TTI) {
4115 Type *Ty = BI->getCondition()->getType();
4116 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4117 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4118 !isa<CmpInst>(PBI->getCondition())))
4119 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4120
4122 continue;
4123 }
4124
4125 // Ok, we do want to deal with this predecessor. Record it.
4126 Preds.emplace_back(PredBlock);
4127 }
4128
4129 // If there aren't any predecessors into which we can fold,
4130 // don't bother checking the cost.
4131 if (Preds.empty())
4132 return false;
4133
4134 // Only allow this transformation if computing the condition doesn't involve
4135 // too many instructions and these involved instructions can be executed
4136 // unconditionally. We denote all involved instructions except the condition
4137 // as "bonus instructions", and only allow this transformation when the
4138 // number of the bonus instructions we'll need to create when cloning into
4139 // each predecessor does not exceed a certain threshold.
4140 unsigned NumBonusInsts = 0;
4141 bool SawVectorOp = false;
4142 const unsigned PredCount = Preds.size();
4143 for (Instruction &I : *BB) {
4144 // Don't check the branch condition comparison itself.
4145 if (&I == Cond)
4146 continue;
4147 // Ignore the terminator.
4149 continue;
4150 // I must be safe to execute unconditionally.
4152 return false;
4153 SawVectorOp |= isVectorOp(I);
4154
4155 // Account for the cost of duplicating this instruction into each
4156 // predecessor. Ignore free instructions.
4157 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4159 NumBonusInsts += PredCount;
4160
4161 // Early exits once we reach the limit.
4162 if (NumBonusInsts >
4163 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4164 return false;
4165 }
4166
4167 auto IsBCSSAUse = [BB, &I](Use &U) {
4168 auto *UI = cast<Instruction>(U.getUser());
4169 if (auto *PN = dyn_cast<PHINode>(UI))
4170 return PN->getIncomingBlock(U) == BB;
4171 return UI->getParent() == BB && I.comesBefore(UI);
4172 };
4173
4174 // Does this instruction require rewriting of uses?
4175 if (!all_of(I.uses(), IsBCSSAUse))
4176 return false;
4177 }
4178 if (NumBonusInsts >
4179 BonusInstThreshold *
4180 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4181 return false;
4182
4183 // Ok, we have the budget. Perform the transformation.
4184 for (BasicBlock *PredBlock : Preds) {
4185 auto *PBI = cast<CondBrInst>(PredBlock->getTerminator());
4186 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4187 }
4188 return false;
4189}
4190
4191// If there is only one store in BB1 and BB2, return it, otherwise return
4192// nullptr.
4194 StoreInst *S = nullptr;
4195 for (auto *BB : {BB1, BB2}) {
4196 if (!BB)
4197 continue;
4198 for (auto &I : *BB)
4199 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4200 if (S)
4201 // Multiple stores seen.
4202 return nullptr;
4203 else
4204 S = SI;
4205 }
4206 }
4207 return S;
4208}
4209
4211 Value *AlternativeV = nullptr) {
4212 // PHI is going to be a PHI node that allows the value V that is defined in
4213 // BB to be referenced in BB's only successor.
4214 //
4215 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4216 // doesn't matter to us what the other operand is (it'll never get used). We
4217 // could just create a new PHI with an undef incoming value, but that could
4218 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4219 // other PHI. So here we directly look for some PHI in BB's successor with V
4220 // as an incoming operand. If we find one, we use it, else we create a new
4221 // one.
4222 //
4223 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4224 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4225 // where OtherBB is the single other predecessor of BB's only successor.
4226 PHINode *PHI = nullptr;
4227 BasicBlock *Succ = BB->getSingleSuccessor();
4228
4229 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4230 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4231 PHI = cast<PHINode>(I);
4232 if (!AlternativeV)
4233 break;
4234
4235 assert(Succ->hasNPredecessors(2));
4236 auto PredI = pred_begin(Succ);
4237 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4238 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4239 break;
4240 PHI = nullptr;
4241 }
4242 if (PHI)
4243 return PHI;
4244
4245 // If V is not an instruction defined in BB, just return it.
4246 if (!AlternativeV &&
4247 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4248 return V;
4249
4250 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4251 PHI->insertBefore(Succ->begin());
4252 PHI->addIncoming(V, BB);
4253 for (BasicBlock *PredBB : predecessors(Succ))
4254 if (PredBB != BB)
4255 PHI->addIncoming(
4256 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4257 return PHI;
4258}
4259
4261 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4262 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4263 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4264 // For every pointer, there must be exactly two stores, one coming from
4265 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4266 // store (to any address) in PTB,PFB or QTB,QFB.
4267 // FIXME: We could relax this restriction with a bit more work and performance
4268 // testing.
4269 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4270 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4271 if (!PStore || !QStore)
4272 return false;
4273
4274 // Now check the stores are compatible.
4275 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4276 PStore->getValueOperand()->getType() !=
4277 QStore->getValueOperand()->getType())
4278 return false;
4279
4280 // Check that sinking the store won't cause program behavior changes. Sinking
4281 // the store out of the Q blocks won't change any behavior as we're sinking
4282 // from a block to its unconditional successor. But we're moving a store from
4283 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4284 // So we need to check that there are no aliasing loads or stores in
4285 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4286 // operations between PStore and the end of its parent block.
4287 //
4288 // The ideal way to do this is to query AliasAnalysis, but we don't
4289 // preserve AA currently so that is dangerous. Be super safe and just
4290 // check there are no other memory operations at all.
4291 for (auto &I : *QFB->getSinglePredecessor())
4292 if (I.mayReadOrWriteMemory())
4293 return false;
4294 for (auto &I : *QFB)
4295 if (&I != QStore && I.mayReadOrWriteMemory())
4296 return false;
4297 if (QTB)
4298 for (auto &I : *QTB)
4299 if (&I != QStore && I.mayReadOrWriteMemory())
4300 return false;
4301 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4302 I != E; ++I)
4303 if (&*I != PStore && I->mayReadOrWriteMemory())
4304 return false;
4305
4306 // If we're not in aggressive mode, we only optimize if we have some
4307 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4308 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4309 if (!BB)
4310 return true;
4311 // Heuristic: if the block can be if-converted/phi-folded and the
4312 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4313 // thread this store.
4314 InstructionCost Cost = 0;
4315 InstructionCost Budget =
4317 for (auto &I : *BB) {
4318 // Consider terminator instruction to be free.
4319 if (I.isTerminator())
4320 continue;
4321 // If this is one the stores that we want to speculate out of this BB,
4322 // then don't count it's cost, consider it to be free.
4323 if (auto *S = dyn_cast<StoreInst>(&I))
4324 if (llvm::find(FreeStores, S))
4325 continue;
4326 // Else, we have a white-list of instructions that we are ak speculating.
4328 return false; // Not in white-list - not worthwhile folding.
4329 // And finally, if this is a non-free instruction that we are okay
4330 // speculating, ensure that we consider the speculation budget.
4331 Cost +=
4332 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4333 if (Cost > Budget)
4334 return false; // Eagerly refuse to fold as soon as we're out of budget.
4335 }
4336 assert(Cost <= Budget &&
4337 "When we run out of budget we will eagerly return from within the "
4338 "per-instruction loop.");
4339 return true;
4340 };
4341
4342 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4344 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4345 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4346 return false;
4347
4348 // If PostBB has more than two predecessors, we need to split it so we can
4349 // sink the store.
4350 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4351 // We know that QFB's only successor is PostBB. And QFB has a single
4352 // predecessor. If QTB exists, then its only successor is also PostBB.
4353 // If QTB does not exist, then QFB's only predecessor has a conditional
4354 // branch to QFB and PostBB.
4355 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4356 BasicBlock *NewBB =
4357 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4358 if (!NewBB)
4359 return false;
4360 PostBB = NewBB;
4361 }
4362
4363 // OK, we're going to sink the stores to PostBB. The store has to be
4364 // conditional though, so first create the predicate.
4365 CondBrInst *PBranch =
4367 CondBrInst *QBranch =
4369 Value *PCond = PBranch->getCondition();
4370 Value *QCond = QBranch->getCondition();
4371
4373 PStore->getParent());
4375 QStore->getParent(), PPHI);
4376
4377 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4378 IRBuilder<> QB(PostBB, PostBBFirst);
4379 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4380
4381 InvertPCond ^= (PStore->getParent() != PTB);
4382 InvertQCond ^= (QStore->getParent() != QTB);
4383 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4384 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4385
4386 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4387
4388 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4389 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4390 /*Unreachable=*/false,
4391 /*BranchWeights=*/nullptr, DTU);
4392 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4394 SmallVector<uint32_t, 2> PWeights, QWeights;
4395 extractBranchWeights(*PBranch, PWeights);
4396 extractBranchWeights(*QBranch, QWeights);
4397 if (InvertPCond)
4398 std::swap(PWeights[0], PWeights[1]);
4399 if (InvertQCond)
4400 std::swap(QWeights[0], QWeights[1]);
4401 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4403 {CombinedWeights[0], CombinedWeights[1]},
4404 /*IsExpected=*/false, /*ElideAllZero=*/true);
4405 }
4406
4407 QB.SetInsertPoint(T);
4408 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4409 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4410 // Choose the minimum alignment. If we could prove both stores execute, we
4411 // could use biggest one. In this case, though, we only know that one of the
4412 // stores executes. And we don't know it's safe to take the alignment from a
4413 // store that doesn't execute.
4414 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4415
4416 QStore->eraseFromParent();
4417 PStore->eraseFromParent();
4418
4419 return true;
4420}
4421
4423 DomTreeUpdater *DTU, const DataLayout &DL,
4424 const TargetTransformInfo &TTI) {
4425 // The intention here is to find diamonds or triangles (see below) where each
4426 // conditional block contains a store to the same address. Both of these
4427 // stores are conditional, so they can't be unconditionally sunk. But it may
4428 // be profitable to speculatively sink the stores into one merged store at the
4429 // end, and predicate the merged store on the union of the two conditions of
4430 // PBI and QBI.
4431 //
4432 // This can reduce the number of stores executed if both of the conditions are
4433 // true, and can allow the blocks to become small enough to be if-converted.
4434 // This optimization will also chain, so that ladders of test-and-set
4435 // sequences can be if-converted away.
4436 //
4437 // We only deal with simple diamonds or triangles:
4438 //
4439 // PBI or PBI or a combination of the two
4440 // / \ | \
4441 // PTB PFB | PFB
4442 // \ / | /
4443 // QBI QBI
4444 // / \ | \
4445 // QTB QFB | QFB
4446 // \ / | /
4447 // PostBB PostBB
4448 //
4449 // We model triangles as a type of diamond with a nullptr "true" block.
4450 // Triangles are canonicalized so that the fallthrough edge is represented by
4451 // a true condition, as in the diagram above.
4452 BasicBlock *PTB = PBI->getSuccessor(0);
4453 BasicBlock *PFB = PBI->getSuccessor(1);
4454 BasicBlock *QTB = QBI->getSuccessor(0);
4455 BasicBlock *QFB = QBI->getSuccessor(1);
4456 BasicBlock *PostBB = QFB->getSingleSuccessor();
4457
4458 // Make sure we have a good guess for PostBB. If QTB's only successor is
4459 // QFB, then QFB is a better PostBB.
4460 if (QTB->getSingleSuccessor() == QFB)
4461 PostBB = QFB;
4462
4463 // If we couldn't find a good PostBB, stop.
4464 if (!PostBB)
4465 return false;
4466
4467 bool InvertPCond = false, InvertQCond = false;
4468 // Canonicalize fallthroughs to the true branches.
4469 if (PFB == QBI->getParent()) {
4470 std::swap(PFB, PTB);
4471 InvertPCond = true;
4472 }
4473 if (QFB == PostBB) {
4474 std::swap(QFB, QTB);
4475 InvertQCond = true;
4476 }
4477
4478 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4479 // and QFB may not. Model fallthroughs as a nullptr block.
4480 if (PTB == QBI->getParent())
4481 PTB = nullptr;
4482 if (QTB == PostBB)
4483 QTB = nullptr;
4484
4485 // Legality bailouts. We must have at least the non-fallthrough blocks and
4486 // the post-dominating block, and the non-fallthroughs must only have one
4487 // predecessor.
4488 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4489 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4490 };
4491 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4492 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4493 return false;
4494 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4495 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4496 return false;
4497 if (!QBI->getParent()->hasNUses(2))
4498 return false;
4499
4500 // OK, this is a sequence of two diamonds or triangles.
4501 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4502 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4503 for (auto *BB : {PTB, PFB}) {
4504 if (!BB)
4505 continue;
4506 for (auto &I : *BB)
4508 PStoreAddresses.insert(SI->getPointerOperand());
4509 }
4510 for (auto *BB : {QTB, QFB}) {
4511 if (!BB)
4512 continue;
4513 for (auto &I : *BB)
4515 QStoreAddresses.insert(SI->getPointerOperand());
4516 }
4517
4518 set_intersect(PStoreAddresses, QStoreAddresses);
4519 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4520 // clear what it contains.
4521 auto &CommonAddresses = PStoreAddresses;
4522
4523 bool Changed = false;
4524 for (auto *Address : CommonAddresses)
4525 Changed |=
4526 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4527 InvertPCond, InvertQCond, DTU, DL, TTI);
4528 return Changed;
4529}
4530
4531/// If the previous block ended with a widenable branch, determine if reusing
4532/// the target block is profitable and legal. This will have the effect of
4533/// "widening" PBI, but doesn't require us to reason about hosting safety.
4535 DomTreeUpdater *DTU) {
4536 // TODO: This can be generalized in two important ways:
4537 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4538 // values from the PBI edge.
4539 // 2) We can sink side effecting instructions into BI's fallthrough
4540 // successor provided they doesn't contribute to computation of
4541 // BI's condition.
4542 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4543 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4544 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4545 !BI->getParent()->getSinglePredecessor())
4546 return false;
4547 if (!IfFalseBB->phis().empty())
4548 return false; // TODO
4549 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4550 // may undo the transform done here.
4551 // TODO: There might be a more fine-grained solution to this.
4552 if (!llvm::succ_empty(IfFalseBB))
4553 return false;
4554 // Use lambda to lazily compute expensive condition after cheap ones.
4555 auto NoSideEffects = [](BasicBlock &BB) {
4556 return llvm::none_of(BB, [](const Instruction &I) {
4557 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4558 });
4559 };
4560 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4561 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4562 NoSideEffects(*BI->getParent())) {
4563 auto *OldSuccessor = BI->getSuccessor(1);
4564 OldSuccessor->removePredecessor(BI->getParent());
4565 BI->setSuccessor(1, IfFalseBB);
4566 if (DTU)
4567 DTU->applyUpdates(
4568 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4569 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4570 return true;
4571 }
4572 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4573 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4574 NoSideEffects(*BI->getParent())) {
4575 auto *OldSuccessor = BI->getSuccessor(0);
4576 OldSuccessor->removePredecessor(BI->getParent());
4577 BI->setSuccessor(0, IfFalseBB);
4578 if (DTU)
4579 DTU->applyUpdates(
4580 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4581 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4582 return true;
4583 }
4584 return false;
4585}
4586
4587/// If we have a conditional branch as a predecessor of another block,
4588/// this function tries to simplify it. We know
4589/// that PBI and BI are both conditional branches, and BI is in one of the
4590/// successor blocks of PBI - PBI branches to BI.
4592 DomTreeUpdater *DTU,
4593 const DataLayout &DL,
4594 const TargetTransformInfo &TTI) {
4595 BasicBlock *BB = BI->getParent();
4596
4597 // If this block ends with a branch instruction, and if there is a
4598 // predecessor that ends on a branch of the same condition, make
4599 // this conditional branch redundant.
4600 if (PBI->getCondition() == BI->getCondition() &&
4601 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4602 // Okay, the outcome of this conditional branch is statically
4603 // knowable. If this block had a single pred, handle specially, otherwise
4604 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4605 if (BB->getSinglePredecessor()) {
4606 // Turn this into a branch on constant.
4607 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4608 BI->setCondition(
4609 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4610 return true; // Nuke the branch on constant.
4611 }
4612 }
4613
4614 // If the previous block ended with a widenable branch, determine if reusing
4615 // the target block is profitable and legal. This will have the effect of
4616 // "widening" PBI, but doesn't require us to reason about hosting safety.
4617 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4618 return true;
4619
4620 // If both branches are conditional and both contain stores to the same
4621 // address, remove the stores from the conditionals and create a conditional
4622 // merged store at the end.
4623 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4624 return true;
4625
4626 // If this is a conditional branch in an empty block, and if any
4627 // predecessors are a conditional branch to one of our destinations,
4628 // fold the conditions into logical ops and one cond br.
4629
4630 // Ignore dbg intrinsics.
4631 if (&*BB->begin() != BI)
4632 return false;
4633
4634 int PBIOp, BIOp;
4635 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4636 PBIOp = 0;
4637 BIOp = 0;
4638 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4639 PBIOp = 0;
4640 BIOp = 1;
4641 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4642 PBIOp = 1;
4643 BIOp = 0;
4644 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4645 PBIOp = 1;
4646 BIOp = 1;
4647 } else {
4648 return false;
4649 }
4650
4651 // Check to make sure that the other destination of this branch
4652 // isn't BB itself. If so, this is an infinite loop that will
4653 // keep getting unwound.
4654 if (PBI->getSuccessor(PBIOp) == BB)
4655 return false;
4656
4657 // If predecessor's branch probability to BB is too low don't merge branches.
4658 SmallVector<uint32_t, 2> PredWeights;
4659 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4660 extractBranchWeights(*PBI, PredWeights) &&
4661 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4662
4664 PredWeights[PBIOp],
4665 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4666
4667 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4668 if (CommonDestProb >= Likely)
4669 return false;
4670 }
4671
4672 // Do not perform this transformation if it would require
4673 // insertion of a large number of select instructions. For targets
4674 // without predication/cmovs, this is a big pessimization.
4675
4676 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4677 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4678 unsigned NumPhis = 0;
4679 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4680 ++II, ++NumPhis) {
4681 if (NumPhis > 2) // Disable this xform.
4682 return false;
4683 }
4684
4685 // Finally, if everything is ok, fold the branches to logical ops.
4686 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4687
4688 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4689 << "AND: " << *BI->getParent());
4690
4692
4693 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4694 // branch in it, where one edge (OtherDest) goes back to itself but the other
4695 // exits. We don't *know* that the program avoids the infinite loop
4696 // (even though that seems likely). If we do this xform naively, we'll end up
4697 // recursively unpeeling the loop. Since we know that (after the xform is
4698 // done) that the block *is* infinite if reached, we just make it an obviously
4699 // infinite loop with no cond branch.
4700 if (OtherDest == BB) {
4701 // Insert it at the end of the function, because it's either code,
4702 // or it won't matter if it's hot. :)
4703 BasicBlock *InfLoopBlock =
4704 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4705 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
4706 if (DTU)
4707 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4708 OtherDest = InfLoopBlock;
4709 }
4710
4711 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4712
4713 // BI may have other predecessors. Because of this, we leave
4714 // it alone, but modify PBI.
4715
4716 // Make sure we get to CommonDest on True&True directions.
4717 Value *PBICond = PBI->getCondition();
4718 IRBuilder<NoFolder> Builder(PBI);
4719 if (PBIOp)
4720 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4721
4722 Value *BICond = BI->getCondition();
4723 if (BIOp)
4724 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4725
4726 // Merge the conditions.
4727 Value *Cond =
4728 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4729
4730 // Modify PBI to branch on the new condition to the new dests.
4731 PBI->setCondition(Cond);
4732 PBI->setSuccessor(0, CommonDest);
4733 PBI->setSuccessor(1, OtherDest);
4734
4735 if (DTU) {
4736 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4737 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4738
4739 DTU->applyUpdates(Updates);
4740 }
4741
4742 // Update branch weight for PBI.
4743 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4744 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4745 bool HasWeights =
4746 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4747 SuccTrueWeight, SuccFalseWeight);
4748 if (HasWeights) {
4749 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4750 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4751 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4752 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4753 // The weight to CommonDest should be PredCommon * SuccTotal +
4754 // PredOther * SuccCommon.
4755 // The weight to OtherDest should be PredOther * SuccOther.
4756 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4757 PredOther * SuccCommon,
4758 PredOther * SuccOther};
4759
4760 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4761 /*ElideAllZero=*/true);
4762 // Cond may be a select instruction with the first operand set to "true", or
4763 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4765 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4766 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4767 // The select is predicated on PBICond
4768 assert(SI->getCondition() == PBICond);
4769 // The corresponding probabilities are what was referred to above as
4770 // PredCommon and PredOther.
4771 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4772 /*IsExpected=*/false, /*ElideAllZero=*/true);
4773 }
4774 }
4775
4776 // OtherDest may have phi nodes. If so, add an entry from PBI's
4777 // block that are identical to the entries for BI's block.
4778 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4779
4780 // We know that the CommonDest already had an edge from PBI to
4781 // it. If it has PHIs though, the PHIs may have different
4782 // entries for BB and PBI's BB. If so, insert a select to make
4783 // them agree.
4784 for (PHINode &PN : CommonDest->phis()) {
4785 Value *BIV = PN.getIncomingValueForBlock(BB);
4786 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4787 Value *PBIV = PN.getIncomingValue(PBBIdx);
4788 if (BIV != PBIV) {
4789 // Insert a select in PBI to pick the right value.
4791 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4792 PN.setIncomingValue(PBBIdx, NV);
4793 // The select has the same condition as PBI, in the same BB. The
4794 // probabilities don't change.
4795 if (HasWeights) {
4796 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4797 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4798 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4799 /*IsExpected=*/false, /*ElideAllZero=*/true);
4800 }
4801 }
4802 }
4803
4804 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4805 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4806
4807 // This basic block is probably dead. We know it has at least
4808 // one fewer predecessor.
4809 return true;
4810}
4811
4812// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4813// true or to FalseBB if Cond is false.
4814// Takes care of updating the successors and removing the old terminator.
4815// Also makes sure not to introduce new successors by assuming that edges to
4816// non-successor TrueBBs and FalseBBs aren't reachable.
4817bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4818 Value *Cond, BasicBlock *TrueBB,
4819 BasicBlock *FalseBB,
4820 uint32_t TrueWeight,
4821 uint32_t FalseWeight) {
4822 auto *BB = OldTerm->getParent();
4823 // Remove any superfluous successor edges from the CFG.
4824 // First, figure out which successors to preserve.
4825 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4826 // successor.
4827 BasicBlock *KeepEdge1 = TrueBB;
4828 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4829
4830 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4831
4832 // Then remove the rest.
4833 for (BasicBlock *Succ : successors(OldTerm)) {
4834 // Make sure only to keep exactly one copy of each edge.
4835 if (Succ == KeepEdge1)
4836 KeepEdge1 = nullptr;
4837 else if (Succ == KeepEdge2)
4838 KeepEdge2 = nullptr;
4839 else {
4840 Succ->removePredecessor(BB,
4841 /*KeepOneInputPHIs=*/true);
4842
4843 if (Succ != TrueBB && Succ != FalseBB)
4844 RemovedSuccessors.insert(Succ);
4845 }
4846 }
4847
4848 IRBuilder<> Builder(OldTerm);
4849 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4850
4851 // Insert an appropriate new terminator.
4852 if (!KeepEdge1 && !KeepEdge2) {
4853 if (TrueBB == FalseBB) {
4854 // We were only looking for one successor, and it was present.
4855 // Create an unconditional branch to it.
4856 Builder.CreateBr(TrueBB);
4857 } else {
4858 // We found both of the successors we were looking for.
4859 // Create a conditional branch sharing the condition of the select.
4860 CondBrInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4861 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4862 /*IsExpected=*/false, /*ElideAllZero=*/true);
4863 }
4864 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4865 // Neither of the selected blocks were successors, so this
4866 // terminator must be unreachable.
4867 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4868 } else {
4869 // One of the selected values was a successor, but the other wasn't.
4870 // Insert an unconditional branch to the one that was found;
4871 // the edge to the one that wasn't must be unreachable.
4872 if (!KeepEdge1) {
4873 // Only TrueBB was found.
4874 Builder.CreateBr(TrueBB);
4875 } else {
4876 // Only FalseBB was found.
4877 Builder.CreateBr(FalseBB);
4878 }
4879 }
4880
4882
4883 if (DTU) {
4884 SmallVector<DominatorTree::UpdateType, 2> Updates;
4885 Updates.reserve(RemovedSuccessors.size());
4886 for (auto *RemovedSuccessor : RemovedSuccessors)
4887 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4888 DTU->applyUpdates(Updates);
4889 }
4890
4891 return true;
4892}
4893
4894// Replaces
4895// (switch (select cond, X, Y)) on constant X, Y
4896// with a branch - conditional if X and Y lead to distinct BBs,
4897// unconditional otherwise.
4898bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4899 SelectInst *Select) {
4900 // Check for constant integer values in the select.
4901 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4902 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4903 if (!TrueVal || !FalseVal)
4904 return false;
4905
4906 // Find the relevant condition and destinations.
4907 Value *Condition = Select->getCondition();
4908 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4909 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4910
4911 // Get weight for TrueBB and FalseBB.
4912 uint32_t TrueWeight = 0, FalseWeight = 0;
4913 SmallVector<uint64_t, 8> Weights;
4914 bool HasWeights = hasBranchWeightMD(*SI);
4915 if (HasWeights) {
4916 getBranchWeights(SI, Weights);
4917 if (Weights.size() == 1 + SI->getNumCases()) {
4918 TrueWeight =
4919 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4920 FalseWeight =
4921 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4922 }
4923 }
4924
4925 // Perform the actual simplification.
4926 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4927 FalseWeight);
4928}
4929
4930// Replaces
4931// (indirectbr (select cond, blockaddress(@fn, BlockA),
4932// blockaddress(@fn, BlockB)))
4933// with
4934// (br cond, BlockA, BlockB).
4935bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4936 SelectInst *SI) {
4937 // Check that both operands of the select are block addresses.
4938 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4939 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4940 if (!TBA || !FBA)
4941 return false;
4942
4943 // Extract the actual blocks.
4944 BasicBlock *TrueBB = TBA->getBasicBlock();
4945 BasicBlock *FalseBB = FBA->getBasicBlock();
4946
4947 // The select's profile becomes the profile of the conditional branch that
4948 // replaces the indirect branch.
4949 SmallVector<uint32_t> SelectBranchWeights(2);
4951 extractBranchWeights(*SI, SelectBranchWeights);
4952 // Perform the actual simplification.
4953 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
4954 SelectBranchWeights[0],
4955 SelectBranchWeights[1]);
4956}
4957
4958/// This is called when we find an icmp instruction
4959/// (a seteq/setne with a constant) as the only instruction in a
4960/// block that ends with an uncond branch. We are looking for a very specific
4961/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4962/// this case, we merge the first two "or's of icmp" into a switch, but then the
4963/// default value goes to an uncond block with a seteq in it, we get something
4964/// like:
4965///
4966/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4967/// DEFAULT:
4968/// %tmp = icmp eq i8 %A, 92
4969/// br label %end
4970/// end:
4971/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4972///
4973/// We prefer to split the edge to 'end' so that there is a true/false entry to
4974/// the PHI, merging the third icmp into the switch.
4975bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4976 ICmpInst *ICI, IRBuilder<> &Builder) {
4977 // Select == nullptr means we assume that there is a hidden no-op select
4978 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
4979 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
4980}
4981
4982/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
4983/// case. This is called when we find an icmp instruction (a seteq/setne with a
4984/// constant) and its following select instruction as the only TWO instructions
4985/// in a block that ends with an uncond branch. We are looking for a very
4986/// specific pattern that occurs when "
4987/// if (A == 1) return C1;
4988/// if (A == 2) return C2;
4989/// if (A < 3) return C3;
4990/// return C4;
4991/// " gets simplified. In this case, we merge the first two "branches of icmp"
4992/// into a switch, but then the default value goes to an uncond block with a lt
4993/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
4994/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
4995/// get something like:
4996///
4997/// case1:
4998/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
4999/// case2:
5000/// br label %end
5001/// DEFAULT:
5002/// %tmp = icmp eq i8 %A, 2
5003/// %val = select i1 %tmp, i8 C3, i8 C4
5004/// br label %end
5005/// end:
5006/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5007///
5008/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5009/// to the PHI, merging the icmp & select into the switch, as follows:
5010///
5011/// case1:
5012/// switch i8 %A, label %DEFAULT [
5013/// i8 0, label %end
5014/// i8 1, label %case2
5015/// i8 2, label %case3
5016/// ]
5017/// case2:
5018/// br label %end
5019/// case3:
5020/// br label %end
5021/// DEFAULT:
5022/// br label %end
5023/// end:
5024/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5025bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5026 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5027 BasicBlock *BB = ICI->getParent();
5028
5029 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5030 // too complex.
5031 /// TODO: support multi-phis in succ BB of select's BB.
5032 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5033 (Select && !Select->hasOneUse()))
5034 return false;
5035
5036 // The pattern we're looking for is where our only predecessor is a switch on
5037 // 'V' and this block is the default case for the switch. In this case we can
5038 // fold the compared value into the switch to simplify things.
5039 BasicBlock *Pred = BB->getSinglePredecessor();
5040 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5041 return false;
5042
5043 Value *IcmpCond;
5044 ConstantInt *NewCaseVal;
5045 CmpPredicate Predicate;
5046
5047 // Match icmp X, C
5048 if (!match(ICI,
5049 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5050 return false;
5051
5052 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5054 if (!Select) {
5055 // If Select == nullptr, we can assume that there is a hidden no-op select
5056 // just after icmp
5057 SelectCond = ICI;
5058 SelectTrueVal = Builder.getTrue();
5059 SelectFalseVal = Builder.getFalse();
5060 User = ICI->user_back();
5061 } else {
5062 SelectCond = Select->getCondition();
5063 // Check if the select condition is the same as the icmp condition.
5064 if (SelectCond != ICI)
5065 return false;
5066 SelectTrueVal = Select->getTrueValue();
5067 SelectFalseVal = Select->getFalseValue();
5068 User = Select->user_back();
5069 }
5070
5071 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5072 if (SI->getCondition() != IcmpCond)
5073 return false;
5074
5075 // If BB is reachable on a non-default case, then we simply know the value of
5076 // V in this block. Substitute it and constant fold the icmp instruction
5077 // away.
5078 if (SI->getDefaultDest() != BB) {
5079 ConstantInt *VVal = SI->findCaseDest(BB);
5080 assert(VVal && "Should have a unique destination value");
5081 ICI->setOperand(0, VVal);
5082
5083 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5084 ICI->replaceAllUsesWith(V);
5085 ICI->eraseFromParent();
5086 }
5087 // BB is now empty, so it is likely to simplify away.
5088 return requestResimplify();
5089 }
5090
5091 // Ok, the block is reachable from the default dest. If the constant we're
5092 // comparing exists in one of the other edges, then we can constant fold ICI
5093 // and zap it.
5094 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5095 Value *V;
5096 if (Predicate == ICmpInst::ICMP_EQ)
5098 else
5100
5101 ICI->replaceAllUsesWith(V);
5102 ICI->eraseFromParent();
5103 // BB is now empty, so it is likely to simplify away.
5104 return requestResimplify();
5105 }
5106
5107 // The use of the select has to be in the 'end' block, by the only PHI node in
5108 // the block.
5109 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5110 PHINode *PHIUse = dyn_cast<PHINode>(User);
5111 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5113 return false;
5114
5115 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5116 // edge gets SelectTrueVal in the PHI.
5117 Value *DefaultCst = SelectFalseVal;
5118 Value *NewCst = SelectTrueVal;
5119
5120 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5121 std::swap(DefaultCst, NewCst);
5122
5123 // Replace Select (which is used by the PHI for the default value) with
5124 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5125 if (Select) {
5126 Select->replaceAllUsesWith(DefaultCst);
5127 Select->eraseFromParent();
5128 } else {
5129 ICI->replaceAllUsesWith(DefaultCst);
5130 }
5131 ICI->eraseFromParent();
5132
5133 SmallVector<DominatorTree::UpdateType, 2> Updates;
5134
5135 // Okay, the switch goes to this block on a default value. Add an edge from
5136 // the switch to the merge point on the compared value.
5137 BasicBlock *NewBB =
5138 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5139 {
5140 SwitchInstProfUpdateWrapper SIW(*SI);
5141 auto W0 = SIW.getSuccessorWeight(0);
5143 if (W0) {
5144 NewW = ((uint64_t(*W0) + 1) >> 1);
5145 SIW.setSuccessorWeight(0, *NewW);
5146 }
5147 SIW.addCase(NewCaseVal, NewBB, NewW);
5148 if (DTU)
5149 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5150 }
5151
5152 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5153 Builder.SetInsertPoint(NewBB);
5154 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5155 Builder.CreateBr(SuccBlock);
5156 PHIUse->addIncoming(NewCst, NewBB);
5157 if (DTU) {
5158 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5159 DTU->applyUpdates(Updates);
5160 }
5161 return true;
5162}
5163
5164/// Check to see if it is branching on an or/and chain of icmp instructions, and
5165/// fold it into a switch instruction if so.
5166bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5167 IRBuilder<> &Builder,
5168 const DataLayout &DL) {
5170 if (!Cond)
5171 return false;
5172
5173 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5174 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5175 // 'setne's and'ed together, collect them.
5176
5177 // Try to gather values from a chain of and/or to be turned into a switch
5178 ConstantComparesGatherer ConstantCompare(Cond, DL);
5179 // Unpack the result
5180 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5181 Value *CompVal = ConstantCompare.CompValue;
5182 unsigned UsedICmps = ConstantCompare.UsedICmps;
5183 Value *ExtraCase = ConstantCompare.Extra;
5184 bool TrueWhenEqual = ConstantCompare.IsEq;
5185
5186 // If we didn't have a multiply compared value, fail.
5187 if (!CompVal)
5188 return false;
5189
5190 // Avoid turning single icmps into a switch.
5191 if (UsedICmps <= 1)
5192 return false;
5193
5194 // There might be duplicate constants in the list, which the switch
5195 // instruction can't handle, remove them now.
5196 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5197 Values.erase(llvm::unique(Values), Values.end());
5198
5199 // If Extra was used, we require at least two switch values to do the
5200 // transformation. A switch with one value is just a conditional branch.
5201 if (ExtraCase && Values.size() < 2)
5202 return false;
5203
5204 SmallVector<uint32_t> BranchWeights;
5205 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5206 extractBranchWeights(*BI, BranchWeights);
5207
5208 // Figure out which block is which destination.
5209 BasicBlock *DefaultBB = BI->getSuccessor(1);
5210 BasicBlock *EdgeBB = BI->getSuccessor(0);
5211 if (!TrueWhenEqual) {
5212 std::swap(DefaultBB, EdgeBB);
5213 if (HasProfile)
5214 std::swap(BranchWeights[0], BranchWeights[1]);
5215 }
5216
5217 BasicBlock *BB = BI->getParent();
5218
5219 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5220 << " cases into SWITCH. BB is:\n"
5221 << *BB);
5222
5223 SmallVector<DominatorTree::UpdateType, 2> Updates;
5224
5225 // If there are any extra values that couldn't be folded into the switch
5226 // then we evaluate them with an explicit branch first. Split the block
5227 // right before the condbr to handle it.
5228 if (ExtraCase) {
5229 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5230 /*MSSAU=*/nullptr, "switch.early.test");
5231
5232 // Remove the uncond branch added to the old block.
5233 Instruction *OldTI = BB->getTerminator();
5234 Builder.SetInsertPoint(OldTI);
5235
5236 // There can be an unintended UB if extra values are Poison. Before the
5237 // transformation, extra values may not be evaluated according to the
5238 // condition, and it will not raise UB. But after transformation, we are
5239 // evaluating extra values before checking the condition, and it will raise
5240 // UB. It can be solved by adding freeze instruction to extra values.
5241 AssumptionCache *AC = Options.AC;
5242
5243 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5244 ExtraCase = Builder.CreateFreeze(ExtraCase);
5245
5246 // We don't have any info about this condition.
5247 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5248 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5250
5251 OldTI->eraseFromParent();
5252
5253 if (DTU)
5254 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5255
5256 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5257 // for the edge we just added.
5258 addPredecessorToBlock(EdgeBB, BB, NewBB);
5259
5260 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5261 << "\nEXTRABB = " << *BB);
5262 BB = NewBB;
5263 }
5264
5265 Builder.SetInsertPoint(BI);
5266 // Convert pointer to int before we switch.
5267 if (CompVal->getType()->isPointerTy()) {
5268 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5269 "Should not end up here with unstable pointers");
5270 CompVal = Builder.CreatePtrToInt(
5271 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5272 }
5273
5274 // Check if we can represent the values as a contiguous range. If so, we use a
5275 // range check + conditional branch instead of a switch.
5276 if (Values.front()->getValue() - Values.back()->getValue() ==
5277 Values.size() - 1) {
5278 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5279 Values.back()->getValue(), Values.front()->getValue() + 1);
5280 APInt Offset, RHS;
5281 ICmpInst::Predicate Pred;
5282 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5283 Value *X = CompVal;
5284 if (!Offset.isZero())
5285 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5286 Value *Cond =
5287 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5288 CondBrInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5289 if (HasProfile)
5290 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5291 // We don't need to update PHI nodes since we don't add any new edges.
5292 } else {
5293 // Create the new switch instruction now.
5294 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5295 if (HasProfile) {
5296 // We know the weight of the default case. We don't know the weight of the
5297 // other cases, but rather than completely lose profiling info, we split
5298 // the remaining probability equally over them.
5299 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5300 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5301 // if TrueWhenEqual.
5302 for (auto &V : drop_begin(NewWeights))
5303 V = BranchWeights[0] / Values.size();
5304 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5305 }
5306
5307 // Add all of the 'cases' to the switch instruction.
5308 for (ConstantInt *Val : Values)
5309 New->addCase(Val, EdgeBB);
5310
5311 // We added edges from PI to the EdgeBB. As such, if there were any
5312 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5313 // the number of edges added.
5314 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5315 PHINode *PN = cast<PHINode>(BBI);
5316 Value *InVal = PN->getIncomingValueForBlock(BB);
5317 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5318 PN->addIncoming(InVal, BB);
5319 }
5320 }
5321
5322 // Erase the old branch instruction.
5324 if (DTU)
5325 DTU->applyUpdates(Updates);
5326
5327 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5328 return true;
5329}
5330
5331bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5332 if (isa<PHINode>(RI->getValue()))
5333 return simplifyCommonResume(RI);
5334 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5335 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5336 // The resume must unwind the exception that caused control to branch here.
5337 return simplifySingleResume(RI);
5338
5339 return false;
5340}
5341
5342// Check if cleanup block is empty
5344 for (Instruction &I : R) {
5345 auto *II = dyn_cast<IntrinsicInst>(&I);
5346 if (!II)
5347 return false;
5348
5349 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5350 switch (IntrinsicID) {
5351 case Intrinsic::dbg_declare:
5352 case Intrinsic::dbg_value:
5353 case Intrinsic::dbg_label:
5354 case Intrinsic::lifetime_end:
5355 break;
5356 default:
5357 return false;
5358 }
5359 }
5360 return true;
5361}
5362
5363// Simplify resume that is shared by several landing pads (phi of landing pad).
5364bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5365 BasicBlock *BB = RI->getParent();
5366
5367 // Check that there are no other instructions except for debug and lifetime
5368 // intrinsics between the phi's and resume instruction.
5369 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5370 BB->getTerminator()->getIterator())))
5371 return false;
5372
5373 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5374 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5375
5376 // Check incoming blocks to see if any of them are trivial.
5377 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5378 Idx++) {
5379 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5380 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5381
5382 // If the block has other successors, we can not delete it because
5383 // it has other dependents.
5384 if (IncomingBB->getUniqueSuccessor() != BB)
5385 continue;
5386
5387 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5388 // Not the landing pad that caused the control to branch here.
5389 if (IncomingValue != LandingPad)
5390 continue;
5391
5393 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5394 TrivialUnwindBlocks.insert(IncomingBB);
5395 }
5396
5397 // If no trivial unwind blocks, don't do any simplifications.
5398 if (TrivialUnwindBlocks.empty())
5399 return false;
5400
5401 // Turn all invokes that unwind here into calls.
5402 for (auto *TrivialBB : TrivialUnwindBlocks) {
5403 // Blocks that will be simplified should be removed from the phi node.
5404 // Note there could be multiple edges to the resume block, and we need
5405 // to remove them all.
5406 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5407 BB->removePredecessor(TrivialBB, true);
5408
5409 for (BasicBlock *Pred :
5411 removeUnwindEdge(Pred, DTU);
5412 ++NumInvokes;
5413 }
5414
5415 // In each SimplifyCFG run, only the current processed block can be erased.
5416 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5417 // of erasing TrivialBB, we only remove the branch to the common resume
5418 // block so that we can later erase the resume block since it has no
5419 // predecessors.
5420 TrivialBB->getTerminator()->eraseFromParent();
5421 new UnreachableInst(RI->getContext(), TrivialBB);
5422 if (DTU)
5423 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5424 }
5425
5426 // Delete the resume block if all its predecessors have been removed.
5427 if (pred_empty(BB))
5428 DeleteDeadBlock(BB, DTU);
5429
5430 return !TrivialUnwindBlocks.empty();
5431}
5432
5433// Simplify resume that is only used by a single (non-phi) landing pad.
5434bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5435 BasicBlock *BB = RI->getParent();
5436 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5437 assert(RI->getValue() == LPInst &&
5438 "Resume must unwind the exception that caused control to here");
5439
5440 // Check that there are no other instructions except for debug intrinsics.
5442 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5443 return false;
5444
5445 // Turn all invokes that unwind here into calls and delete the basic block.
5446 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5447 removeUnwindEdge(Pred, DTU);
5448 ++NumInvokes;
5449 }
5450
5451 // The landingpad is now unreachable. Zap it.
5452 DeleteDeadBlock(BB, DTU);
5453 return true;
5454}
5455
5457 // If this is a trivial cleanup pad that executes no instructions, it can be
5458 // eliminated. If the cleanup pad continues to the caller, any predecessor
5459 // that is an EH pad will be updated to continue to the caller and any
5460 // predecessor that terminates with an invoke instruction will have its invoke
5461 // instruction converted to a call instruction. If the cleanup pad being
5462 // simplified does not continue to the caller, each predecessor will be
5463 // updated to continue to the unwind destination of the cleanup pad being
5464 // simplified.
5465 BasicBlock *BB = RI->getParent();
5466 CleanupPadInst *CPInst = RI->getCleanupPad();
5467 if (CPInst->getParent() != BB)
5468 // This isn't an empty cleanup.
5469 return false;
5470
5471 // We cannot kill the pad if it has multiple uses. This typically arises
5472 // from unreachable basic blocks.
5473 if (!CPInst->hasOneUse())
5474 return false;
5475
5476 // Check that there are no other instructions except for benign intrinsics.
5478 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5479 return false;
5480
5481 // If the cleanup return we are simplifying unwinds to the caller, this will
5482 // set UnwindDest to nullptr.
5483 BasicBlock *UnwindDest = RI->getUnwindDest();
5484
5485 // We're about to remove BB from the control flow. Before we do, sink any
5486 // PHINodes into the unwind destination. Doing this before changing the
5487 // control flow avoids some potentially slow checks, since we can currently
5488 // be certain that UnwindDest and BB have no common predecessors (since they
5489 // are both EH pads).
5490 if (UnwindDest) {
5491 // First, go through the PHI nodes in UnwindDest and update any nodes that
5492 // reference the block we are removing
5493 for (PHINode &DestPN : UnwindDest->phis()) {
5494 int Idx = DestPN.getBasicBlockIndex(BB);
5495 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5496 assert(Idx != -1);
5497 // This PHI node has an incoming value that corresponds to a control
5498 // path through the cleanup pad we are removing. If the incoming
5499 // value is in the cleanup pad, it must be a PHINode (because we
5500 // verified above that the block is otherwise empty). Otherwise, the
5501 // value is either a constant or a value that dominates the cleanup
5502 // pad being removed.
5503 //
5504 // Because BB and UnwindDest are both EH pads, all of their
5505 // predecessors must unwind to these blocks, and since no instruction
5506 // can have multiple unwind destinations, there will be no overlap in
5507 // incoming blocks between SrcPN and DestPN.
5508 Value *SrcVal = DestPN.getIncomingValue(Idx);
5509 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5510
5511 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5512 for (auto *Pred : predecessors(BB)) {
5513 Value *Incoming =
5514 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5515 DestPN.addIncoming(Incoming, Pred);
5516 }
5517 }
5518
5519 // Sink any remaining PHI nodes directly into UnwindDest.
5520 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5521 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5522 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5523 // If the PHI node has no uses or all of its uses are in this basic
5524 // block (meaning they are debug or lifetime intrinsics), just leave
5525 // it. It will be erased when we erase BB below.
5526 continue;
5527
5528 // Otherwise, sink this PHI node into UnwindDest.
5529 // Any predecessors to UnwindDest which are not already represented
5530 // must be back edges which inherit the value from the path through
5531 // BB. In this case, the PHI value must reference itself.
5532 for (auto *pred : predecessors(UnwindDest))
5533 if (pred != BB)
5534 PN.addIncoming(&PN, pred);
5535 PN.moveBefore(InsertPt);
5536 // Also, add a dummy incoming value for the original BB itself,
5537 // so that the PHI is well-formed until we drop said predecessor.
5538 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5539 }
5540 }
5541
5542 std::vector<DominatorTree::UpdateType> Updates;
5543
5544 // We use make_early_inc_range here because we will remove all predecessors.
5546 if (UnwindDest == nullptr) {
5547 if (DTU) {
5548 DTU->applyUpdates(Updates);
5549 Updates.clear();
5550 }
5551 removeUnwindEdge(PredBB, DTU);
5552 ++NumInvokes;
5553 } else {
5554 BB->removePredecessor(PredBB);
5555 Instruction *TI = PredBB->getTerminator();
5556 TI->replaceUsesOfWith(BB, UnwindDest);
5557 if (DTU) {
5558 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5559 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5560 }
5561 }
5562 }
5563
5564 if (DTU)
5565 DTU->applyUpdates(Updates);
5566
5567 DeleteDeadBlock(BB, DTU);
5568
5569 return true;
5570}
5571
5572// Try to merge two cleanuppads together.
5574 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5575 // with.
5576 BasicBlock *UnwindDest = RI->getUnwindDest();
5577 if (!UnwindDest)
5578 return false;
5579
5580 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5581 // be safe to merge without code duplication.
5582 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5583 return false;
5584
5585 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5586 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5587 if (!SuccessorCleanupPad)
5588 return false;
5589
5590 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5591 // Replace any uses of the successor cleanupad with the predecessor pad
5592 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5593 // funclet bundle operands.
5594 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5595 // Remove the old cleanuppad.
5596 SuccessorCleanupPad->eraseFromParent();
5597 // Now, we simply replace the cleanupret with a branch to the unwind
5598 // destination.
5599 UncondBrInst::Create(UnwindDest, RI->getParent());
5600 RI->eraseFromParent();
5601
5602 return true;
5603}
5604
5605bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5606 // It is possible to transiantly have an undef cleanuppad operand because we
5607 // have deleted some, but not all, dead blocks.
5608 // Eventually, this block will be deleted.
5609 if (isa<UndefValue>(RI->getOperand(0)))
5610 return false;
5611
5612 if (mergeCleanupPad(RI))
5613 return true;
5614
5615 if (removeEmptyCleanup(RI, DTU))
5616 return true;
5617
5618 return false;
5619}
5620
5621// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5622bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5623 BasicBlock *BB = UI->getParent();
5624
5625 bool Changed = false;
5626
5627 // Ensure that any debug-info records that used to occur after the Unreachable
5628 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5629 // the block.
5631
5632 // Debug-info records on the unreachable inst itself should be deleted, as
5633 // below we delete everything past the final executable instruction.
5634 UI->dropDbgRecords();
5635
5636 // If there are any instructions immediately before the unreachable that can
5637 // be removed, do so.
5638 while (UI->getIterator() != BB->begin()) {
5640 --BBI;
5641
5643 break; // Can not drop any more instructions. We're done here.
5644 // Otherwise, this instruction can be freely erased,
5645 // even if it is not side-effect free.
5646
5647 // Note that deleting EH's here is in fact okay, although it involves a bit
5648 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5649 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5650 // and we can therefore guarantee this block will be erased.
5651
5652 // If we're deleting this, we're deleting any subsequent debug info, so
5653 // delete DbgRecords.
5654 BBI->dropDbgRecords();
5655
5656 // Delete this instruction (any uses are guaranteed to be dead)
5657 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5658 BBI->eraseFromParent();
5659 Changed = true;
5660 }
5661
5662 // If the unreachable instruction is the first in the block, take a gander
5663 // at all of the predecessors of this instruction, and simplify them.
5664 if (&BB->front() != UI)
5665 return Changed;
5666
5667 std::vector<DominatorTree::UpdateType> Updates;
5668
5669 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5670 for (BasicBlock *Predecessor : Preds) {
5671 Instruction *TI = Predecessor->getTerminator();
5672 IRBuilder<> Builder(TI);
5673 if (isa<UncondBrInst>(TI)) {
5674 new UnreachableInst(TI->getContext(), TI->getIterator());
5675 TI->eraseFromParent();
5676 Changed = true;
5677 if (DTU)
5678 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5679 } else if (auto *BI = dyn_cast<CondBrInst>(TI)) {
5680 // We could either have a proper unconditional branch,
5681 // or a degenerate conditional branch with matching destinations.
5682 if (BI->getSuccessor(0) == BI->getSuccessor(1)) {
5683 new UnreachableInst(TI->getContext(), TI->getIterator());
5684 TI->eraseFromParent();
5685 Changed = true;
5686 } else {
5687 Value* Cond = BI->getCondition();
5688 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5689 "The destinations are guaranteed to be different here.");
5690 CallInst *Assumption;
5691 if (BI->getSuccessor(0) == BB) {
5692 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5693 Builder.CreateBr(BI->getSuccessor(1));
5694 } else {
5695 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5696 Assumption = Builder.CreateAssumption(Cond);
5697 Builder.CreateBr(BI->getSuccessor(0));
5698 }
5699 if (Options.AC)
5700 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5701
5703 Changed = true;
5704 }
5705 if (DTU)
5706 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5707 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5708 SwitchInstProfUpdateWrapper SU(*SI);
5709 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5710 if (i->getCaseSuccessor() != BB) {
5711 ++i;
5712 continue;
5713 }
5714 BB->removePredecessor(SU->getParent());
5715 i = SU.removeCase(i);
5716 e = SU->case_end();
5717 Changed = true;
5718 }
5719 // Note that the default destination can't be removed!
5720 if (DTU && SI->getDefaultDest() != BB)
5721 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5722 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5723 if (II->getUnwindDest() == BB) {
5724 if (DTU) {
5725 DTU->applyUpdates(Updates);
5726 Updates.clear();
5727 }
5728 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5729 if (!CI->doesNotThrow())
5730 CI->setDoesNotThrow();
5731 Changed = true;
5732 }
5733 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5734 if (CSI->getUnwindDest() == BB) {
5735 if (DTU) {
5736 DTU->applyUpdates(Updates);
5737 Updates.clear();
5738 }
5739 removeUnwindEdge(TI->getParent(), DTU);
5740 Changed = true;
5741 continue;
5742 }
5743
5744 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5745 E = CSI->handler_end();
5746 I != E; ++I) {
5747 if (*I == BB) {
5748 CSI->removeHandler(I);
5749 --I;
5750 --E;
5751 Changed = true;
5752 }
5753 }
5754 if (DTU)
5755 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5756 if (CSI->getNumHandlers() == 0) {
5757 if (CSI->hasUnwindDest()) {
5758 // Redirect all predecessors of the block containing CatchSwitchInst
5759 // to instead branch to the CatchSwitchInst's unwind destination.
5760 if (DTU) {
5761 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5762 Updates.push_back({DominatorTree::Insert,
5763 PredecessorOfPredecessor,
5764 CSI->getUnwindDest()});
5765 Updates.push_back({DominatorTree::Delete,
5766 PredecessorOfPredecessor, Predecessor});
5767 }
5768 }
5769 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5770 } else {
5771 // Rewrite all preds to unwind to caller (or from invoke to call).
5772 if (DTU) {
5773 DTU->applyUpdates(Updates);
5774 Updates.clear();
5775 }
5776 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5777 for (BasicBlock *EHPred : EHPreds)
5778 removeUnwindEdge(EHPred, DTU);
5779 }
5780 // The catchswitch is no longer reachable.
5781 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5782 CSI->eraseFromParent();
5783 Changed = true;
5784 }
5785 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5786 (void)CRI;
5787 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5788 "Expected to always have an unwind to BB.");
5789 if (DTU)
5790 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5791 new UnreachableInst(TI->getContext(), TI->getIterator());
5792 TI->eraseFromParent();
5793 Changed = true;
5794 }
5795 }
5796
5797 if (DTU)
5798 DTU->applyUpdates(Updates);
5799
5800 // If this block is now dead, remove it.
5801 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5802 DeleteDeadBlock(BB, DTU);
5803 return true;
5804 }
5805
5806 return Changed;
5807}
5808
5817
5818static std::optional<ContiguousCasesResult>
5821 BasicBlock *Dest, BasicBlock *OtherDest) {
5822 assert(Cases.size() >= 1);
5823
5825 const APInt &Min = Cases.back()->getValue();
5826 const APInt &Max = Cases.front()->getValue();
5827 APInt Offset = Max - Min;
5828 size_t ContiguousOffset = Cases.size() - 1;
5829 if (Offset == ContiguousOffset) {
5830 return ContiguousCasesResult{
5831 /*Min=*/Cases.back(),
5832 /*Max=*/Cases.front(),
5833 /*Dest=*/Dest,
5834 /*OtherDest=*/OtherDest,
5835 /*Cases=*/&Cases,
5836 /*OtherCases=*/&OtherCases,
5837 };
5838 }
5839 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5840 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5841 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5842 // contiguous range for the other destination. N.B. If CR is not a full range,
5843 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5844 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5845 assert(Cases.size() >= 2);
5846 auto *It =
5847 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5848 return L->getValue() != R->getValue() + 1;
5849 });
5850 if (It == Cases.end())
5851 return std::nullopt;
5852 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5853 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5854 Cases.size() - 2) {
5855 return ContiguousCasesResult{
5856 /*Min=*/cast<ConstantInt>(
5857 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5858 /*Max=*/
5860 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5861 /*Dest=*/OtherDest,
5862 /*OtherDest=*/Dest,
5863 /*Cases=*/&OtherCases,
5864 /*OtherCases=*/&Cases,
5865 };
5866 }
5867 }
5868 return std::nullopt;
5869}
5870
5872 DomTreeUpdater *DTU,
5873 bool RemoveOrigDefaultBlock = true) {
5874 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5875 auto *BB = Switch->getParent();
5876 auto *OrigDefaultBlock = Switch->getDefaultDest();
5877 if (RemoveOrigDefaultBlock)
5878 OrigDefaultBlock->removePredecessor(BB);
5879 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5880 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5881 OrigDefaultBlock);
5882 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5884 Switch->setDefaultDest(&*NewDefaultBlock);
5885 if (DTU) {
5887 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5888 if (RemoveOrigDefaultBlock &&
5889 !is_contained(successors(BB), OrigDefaultBlock))
5890 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5891 DTU->applyUpdates(Updates);
5892 }
5893}
5894
5895/// Turn a switch into an integer range comparison and branch.
5896/// Switches with more than 2 destinations are ignored.
5897/// Switches with 1 destination are also ignored.
5898bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5899 IRBuilder<> &Builder) {
5900 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5901
5902 bool HasDefault = !SI->defaultDestUnreachable();
5903
5904 auto *BB = SI->getParent();
5905 // Partition the cases into two sets with different destinations.
5906 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5907 BasicBlock *DestB = nullptr;
5910
5911 for (auto Case : SI->cases()) {
5912 BasicBlock *Dest = Case.getCaseSuccessor();
5913 if (!DestA)
5914 DestA = Dest;
5915 if (Dest == DestA) {
5916 CasesA.push_back(Case.getCaseValue());
5917 continue;
5918 }
5919 if (!DestB)
5920 DestB = Dest;
5921 if (Dest == DestB) {
5922 CasesB.push_back(Case.getCaseValue());
5923 continue;
5924 }
5925 return false; // More than two destinations.
5926 }
5927 if (!DestB)
5928 return false; // All destinations are the same and the default is unreachable
5929
5930 assert(DestA && DestB &&
5931 "Single-destination switch should have been folded.");
5932 assert(DestA != DestB);
5933 assert(DestB != SI->getDefaultDest());
5934 assert(!CasesB.empty() && "There must be non-default cases.");
5935 assert(!CasesA.empty() || HasDefault);
5936
5937 // Figure out if one of the sets of cases form a contiguous range.
5938 std::optional<ContiguousCasesResult> ContiguousCases;
5939
5940 // Only one icmp is needed when there is only one case.
5941 if (!HasDefault && CasesA.size() == 1)
5942 ContiguousCases = ContiguousCasesResult{
5943 /*Min=*/CasesA[0],
5944 /*Max=*/CasesA[0],
5945 /*Dest=*/DestA,
5946 /*OtherDest=*/DestB,
5947 /*Cases=*/&CasesA,
5948 /*OtherCases=*/&CasesB,
5949 };
5950 else if (CasesB.size() == 1)
5951 ContiguousCases = ContiguousCasesResult{
5952 /*Min=*/CasesB[0],
5953 /*Max=*/CasesB[0],
5954 /*Dest=*/DestB,
5955 /*OtherDest=*/DestA,
5956 /*Cases=*/&CasesB,
5957 /*OtherCases=*/&CasesA,
5958 };
5959 // Correctness: Cases to the default destination cannot be contiguous cases.
5960 else if (!HasDefault)
5961 ContiguousCases =
5962 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
5963
5964 if (!ContiguousCases)
5965 ContiguousCases =
5966 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
5967
5968 if (!ContiguousCases)
5969 return false;
5970
5971 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
5972
5973 // Start building the compare and branch.
5974
5976 Constant *NumCases = ConstantInt::get(Offset->getType(),
5977 Max->getValue() - Min->getValue() + 1);
5978 Instruction *NewBI;
5979 if (NumCases->isOneValue()) {
5980 assert(Max->getValue() == Min->getValue());
5981 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
5982 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5983 }
5984 // If NumCases overflowed, then all possible values jump to the successor.
5985 else if (NumCases->isNullValue() && !Cases->empty()) {
5986 NewBI = Builder.CreateBr(Dest);
5987 } else {
5988 Value *Sub = SI->getCondition();
5989 if (!Offset->isNullValue())
5990 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5991 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5992 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5993 }
5994
5995 // Update weight for the newly-created conditional branch.
5996 if (hasBranchWeightMD(*SI) && isa<CondBrInst>(NewBI)) {
5997 SmallVector<uint64_t, 8> Weights;
5998 getBranchWeights(SI, Weights);
5999 if (Weights.size() == 1 + SI->getNumCases()) {
6000 uint64_t TrueWeight = 0;
6001 uint64_t FalseWeight = 0;
6002 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6003 if (SI->getSuccessor(I) == Dest)
6004 TrueWeight += Weights[I];
6005 else
6006 FalseWeight += Weights[I];
6007 }
6008 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6009 TrueWeight /= 2;
6010 FalseWeight /= 2;
6011 }
6012 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6013 /*IsExpected=*/false, /*ElideAllZero=*/true);
6014 }
6015 }
6016
6017 // Prune obsolete incoming values off the successors' PHI nodes.
6018 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6019 unsigned PreviousEdges = Cases->size();
6020 if (Dest == SI->getDefaultDest())
6021 ++PreviousEdges;
6022 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6023 PHI.removeIncomingValue(SI->getParent());
6024 }
6025 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6026 unsigned PreviousEdges = OtherCases->size();
6027 if (OtherDest == SI->getDefaultDest())
6028 ++PreviousEdges;
6029 unsigned E = PreviousEdges - 1;
6030 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6031 if (isa<UncondBrInst>(NewBI))
6032 ++E;
6033 for (unsigned I = 0; I != E; ++I)
6034 PHI.removeIncomingValue(SI->getParent());
6035 }
6036
6037 // Clean up the default block - it may have phis or other instructions before
6038 // the unreachable terminator.
6039 if (!HasDefault)
6041
6042 auto *UnreachableDefault = SI->getDefaultDest();
6043
6044 // Drop the switch.
6045 SI->eraseFromParent();
6046
6047 if (!HasDefault && DTU)
6048 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6049
6050 return true;
6051}
6052
6053/// Compute masked bits for the condition of a switch
6054/// and use it to remove dead cases.
6056 AssumptionCache *AC,
6057 const DataLayout &DL) {
6058 Value *Cond = SI->getCondition();
6059 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6061 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6062
6063 // We can also eliminate cases by determining that their values are outside of
6064 // the limited range of the condition based on how many significant (non-sign)
6065 // bits are in the condition value.
6066 unsigned MaxSignificantBitsInCond =
6068
6069 // Gather dead cases.
6071 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6072 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6073 for (const auto &Case : SI->cases()) {
6074 auto *Successor = Case.getCaseSuccessor();
6075 if (DTU) {
6076 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6077 if (Inserted)
6078 UniqueSuccessors.push_back(Successor);
6079 ++It->second;
6080 }
6081 ConstantInt *CaseC = Case.getCaseValue();
6082 const APInt &CaseVal = CaseC->getValue();
6083 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6084 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6085 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6086 DeadCases.push_back(CaseC);
6087 if (DTU)
6088 --NumPerSuccessorCases[Successor];
6089 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6090 << " is dead.\n");
6091 } else if (IsKnownValuesValid)
6092 KnownValues.erase(CaseC);
6093 }
6094
6095 // If we can prove that the cases must cover all possible values, the
6096 // default destination becomes dead and we can remove it. If we know some
6097 // of the bits in the value, we can use that to more precisely compute the
6098 // number of possible unique case values.
6099 bool HasDefault = !SI->defaultDestUnreachable();
6100 const unsigned NumUnknownBits =
6101 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6102 assert(NumUnknownBits <= Known.getBitWidth());
6103 if (HasDefault && DeadCases.empty()) {
6104 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6106 return true;
6107 }
6108
6109 if (NumUnknownBits < 64 /* avoid overflow */) {
6110 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6111 if (SI->getNumCases() == AllNumCases) {
6113 return true;
6114 }
6115 // When only one case value is missing, replace default with that case.
6116 // Eliminating the default branch will provide more opportunities for
6117 // optimization, such as lookup tables.
6118 if (SI->getNumCases() == AllNumCases - 1) {
6119 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6120 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6121 if (CondTy->getIntegerBitWidth() > 64 ||
6122 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6123 return false;
6124
6125 uint64_t MissingCaseVal = 0;
6126 for (const auto &Case : SI->cases())
6127 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6128 auto *MissingCase = cast<ConstantInt>(
6129 ConstantInt::get(Cond->getType(), MissingCaseVal));
6131 SIW.addCase(MissingCase, SI->getDefaultDest(),
6132 SIW.getSuccessorWeight(0));
6134 /*RemoveOrigDefaultBlock*/ false);
6135 SIW.setSuccessorWeight(0, 0);
6136 return true;
6137 }
6138 }
6139 }
6140
6141 if (DeadCases.empty())
6142 return false;
6143
6145 for (ConstantInt *DeadCase : DeadCases) {
6146 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6147 assert(CaseI != SI->case_default() &&
6148 "Case was not found. Probably mistake in DeadCases forming.");
6149 // Prune unused values from PHI nodes.
6150 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6151 SIW.removeCase(CaseI);
6152 }
6153
6154 if (DTU) {
6155 std::vector<DominatorTree::UpdateType> Updates;
6156 for (auto *Successor : UniqueSuccessors)
6157 if (NumPerSuccessorCases[Successor] == 0)
6158 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6159 DTU->applyUpdates(Updates);
6160 }
6161
6162 return true;
6163}
6164
6165/// If BB would be eligible for simplification by
6166/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6167/// by an unconditional branch), look at the phi node for BB in the successor
6168/// block and see if the incoming value is equal to CaseValue. If so, return
6169/// the phi node, and set PhiIndex to BB's index in the phi node.
6171 BasicBlock *BB, int *PhiIndex) {
6172 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6173 return nullptr; // BB must be empty to be a candidate for simplification.
6174 if (!BB->getSinglePredecessor())
6175 return nullptr; // BB must be dominated by the switch.
6176
6178 if (!Branch)
6179 return nullptr; // Terminator must be unconditional branch.
6180
6181 BasicBlock *Succ = Branch->getSuccessor();
6182
6183 for (PHINode &PHI : Succ->phis()) {
6184 int Idx = PHI.getBasicBlockIndex(BB);
6185 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6186
6187 Value *InValue = PHI.getIncomingValue(Idx);
6188 if (InValue != CaseValue)
6189 continue;
6190
6191 *PhiIndex = Idx;
6192 return &PHI;
6193 }
6194
6195 return nullptr;
6196}
6197
6198/// Try to forward the condition of a switch instruction to a phi node
6199/// dominated by the switch, if that would mean that some of the destination
6200/// blocks of the switch can be folded away. Return true if a change is made.
6202 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6203
6204 ForwardingNodesMap ForwardingNodes;
6205 BasicBlock *SwitchBlock = SI->getParent();
6206 bool Changed = false;
6207 for (const auto &Case : SI->cases()) {
6208 ConstantInt *CaseValue = Case.getCaseValue();
6209 BasicBlock *CaseDest = Case.getCaseSuccessor();
6210
6211 // Replace phi operands in successor blocks that are using the constant case
6212 // value rather than the switch condition variable:
6213 // switchbb:
6214 // switch i32 %x, label %default [
6215 // i32 17, label %succ
6216 // ...
6217 // succ:
6218 // %r = phi i32 ... [ 17, %switchbb ] ...
6219 // -->
6220 // %r = phi i32 ... [ %x, %switchbb ] ...
6221
6222 for (PHINode &Phi : CaseDest->phis()) {
6223 // This only works if there is exactly 1 incoming edge from the switch to
6224 // a phi. If there is >1, that means multiple cases of the switch map to 1
6225 // value in the phi, and that phi value is not the switch condition. Thus,
6226 // this transform would not make sense (the phi would be invalid because
6227 // a phi can't have different incoming values from the same block).
6228 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6229 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6230 count(Phi.blocks(), SwitchBlock) == 1) {
6231 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6232 Changed = true;
6233 }
6234 }
6235
6236 // Collect phi nodes that are indirectly using this switch's case constants.
6237 int PhiIdx;
6238 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6239 ForwardingNodes[Phi].push_back(PhiIdx);
6240 }
6241
6242 for (auto &ForwardingNode : ForwardingNodes) {
6243 PHINode *Phi = ForwardingNode.first;
6244 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6245 // Check if it helps to fold PHI.
6246 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6247 continue;
6248
6249 for (int Index : Indexes)
6250 Phi->setIncomingValue(Index, SI->getCondition());
6251 Changed = true;
6252 }
6253
6254 return Changed;
6255}
6256
6257/// Return true if the backend will be able to handle
6258/// initializing an array of constants like C.
6260 if (C->isThreadDependent())
6261 return false;
6262 if (C->isDLLImportDependent())
6263 return false;
6264
6267 return false;
6268
6269 // Globals cannot contain scalable types.
6270 if (C->getType()->isScalableTy())
6271 return false;
6272
6274 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6275 // materializing the array of constants.
6276 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6277 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6278 return false;
6279 }
6280
6281 if (!TTI.shouldBuildLookupTablesForConstant(C))
6282 return false;
6283
6284 return true;
6285}
6286
6287/// If V is a Constant, return it. Otherwise, try to look up
6288/// its constant value in ConstantPool, returning 0 if it's not there.
6289static Constant *
6292 if (Constant *C = dyn_cast<Constant>(V))
6293 return C;
6294 return ConstantPool.lookup(V);
6295}
6296
6297/// Try to fold instruction I into a constant. This works for
6298/// simple instructions such as binary operations where both operands are
6299/// constant or can be replaced by constants from the ConstantPool. Returns the
6300/// resulting constant on success, 0 otherwise.
6301static Constant *
6305 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6306 if (!A)
6307 return nullptr;
6308 if (A->isAllOnesValue())
6309 return lookupConstant(Select->getTrueValue(), ConstantPool);
6310 if (A->isNullValue())
6311 return lookupConstant(Select->getFalseValue(), ConstantPool);
6312 return nullptr;
6313 }
6314
6316 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6317 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6318 COps.push_back(A);
6319 else
6320 return nullptr;
6321 }
6322
6323 return ConstantFoldInstOperands(I, COps, DL);
6324}
6325
6326/// Try to determine the resulting constant values in phi nodes
6327/// at the common destination basic block, *CommonDest, for one of the case
6328/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6329/// default case), of a switch instruction SI.
6330static bool
6332 BasicBlock **CommonDest,
6333 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6334 const DataLayout &DL, const TargetTransformInfo &TTI) {
6335 // The block from which we enter the common destination.
6336 BasicBlock *Pred = SI->getParent();
6337
6338 // If CaseDest is empty except for some side-effect free instructions through
6339 // which we can constant-propagate the CaseVal, continue to its successor.
6341 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6342 for (Instruction &I : *CaseDest) {
6343 if (I.isTerminator()) {
6344 // If the terminator is a simple branch, continue to the next block.
6345 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6346 return false;
6347 Pred = CaseDest;
6348 CaseDest = I.getSuccessor(0);
6349 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6350 // Instruction is side-effect free and constant.
6351
6352 // If the instruction has uses outside this block or a phi node slot for
6353 // the block, it is not safe to bypass the instruction since it would then
6354 // no longer dominate all its uses.
6355 for (auto &Use : I.uses()) {
6356 User *User = Use.getUser();
6358 if (I->getParent() == CaseDest)
6359 continue;
6360 if (PHINode *Phi = dyn_cast<PHINode>(User))
6361 if (Phi->getIncomingBlock(Use) == CaseDest)
6362 continue;
6363 return false;
6364 }
6365
6366 ConstantPool.insert(std::make_pair(&I, C));
6367 } else {
6368 break;
6369 }
6370 }
6371
6372 // If we did not have a CommonDest before, use the current one.
6373 if (!*CommonDest)
6374 *CommonDest = CaseDest;
6375 // If the destination isn't the common one, abort.
6376 if (CaseDest != *CommonDest)
6377 return false;
6378
6379 // Get the values for this case from phi nodes in the destination block.
6380 for (PHINode &PHI : (*CommonDest)->phis()) {
6381 int Idx = PHI.getBasicBlockIndex(Pred);
6382 if (Idx == -1)
6383 continue;
6384
6385 Constant *ConstVal =
6386 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6387 if (!ConstVal)
6388 return false;
6389
6390 // Be conservative about which kinds of constants we support.
6391 if (!validLookupTableConstant(ConstVal, TTI))
6392 return false;
6393
6394 Res.push_back(std::make_pair(&PHI, ConstVal));
6395 }
6396
6397 return Res.size() > 0;
6398}
6399
6400// Helper function used to add CaseVal to the list of cases that generate
6401// Result. Returns the updated number of cases that generate this result.
6402static size_t mapCaseToResult(ConstantInt *CaseVal,
6403 SwitchCaseResultVectorTy &UniqueResults,
6404 Constant *Result) {
6405 for (auto &I : UniqueResults) {
6406 if (I.first == Result) {
6407 I.second.push_back(CaseVal);
6408 return I.second.size();
6409 }
6410 }
6411 UniqueResults.push_back(
6412 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6413 return 1;
6414}
6415
6416// Helper function that initializes a map containing
6417// results for the PHI node of the common destination block for a switch
6418// instruction. Returns false if multiple PHI nodes have been found or if
6419// there is not a common destination block for the switch.
6421 BasicBlock *&CommonDest,
6422 SwitchCaseResultVectorTy &UniqueResults,
6423 Constant *&DefaultResult,
6424 const DataLayout &DL,
6425 const TargetTransformInfo &TTI,
6426 uintptr_t MaxUniqueResults) {
6427 for (const auto &I : SI->cases()) {
6428 ConstantInt *CaseVal = I.getCaseValue();
6429
6430 // Resulting value at phi nodes for this case value.
6431 SwitchCaseResultsTy Results;
6432 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6433 DL, TTI))
6434 return false;
6435
6436 // Only one value per case is permitted.
6437 if (Results.size() > 1)
6438 return false;
6439
6440 // Add the case->result mapping to UniqueResults.
6441 const size_t NumCasesForResult =
6442 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6443
6444 // Early out if there are too many cases for this result.
6445 if (NumCasesForResult > MaxSwitchCasesPerResult)
6446 return false;
6447
6448 // Early out if there are too many unique results.
6449 if (UniqueResults.size() > MaxUniqueResults)
6450 return false;
6451
6452 // Check the PHI consistency.
6453 if (!PHI)
6454 PHI = Results[0].first;
6455 else if (PHI != Results[0].first)
6456 return false;
6457 }
6458 // Find the default result value.
6460 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6461 DL, TTI);
6462 // If the default value is not found abort unless the default destination
6463 // is unreachable.
6464 DefaultResult =
6465 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6466
6467 return DefaultResult || SI->defaultDestUnreachable();
6468}
6469
6470// Helper function that checks if it is possible to transform a switch with only
6471// two cases (or two cases + default) that produces a result into a select.
6472// TODO: Handle switches with more than 2 cases that map to the same result.
6473// The branch weights correspond to the provided Condition (i.e. if Condition is
6474// modified from the original SwitchInst, the caller must adjust the weights)
6475static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6476 Constant *DefaultResult, Value *Condition,
6477 IRBuilder<> &Builder, const DataLayout &DL,
6478 ArrayRef<uint32_t> BranchWeights) {
6479 // If we are selecting between only two cases transform into a simple
6480 // select or a two-way select if default is possible.
6481 // Example:
6482 // switch (a) { %0 = icmp eq i32 %a, 10
6483 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6484 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6485 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6486 // }
6487
6488 const bool HasBranchWeights =
6489 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6490
6491 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6492 ResultVector[1].second.size() == 1) {
6493 ConstantInt *FirstCase = ResultVector[0].second[0];
6494 ConstantInt *SecondCase = ResultVector[1].second[0];
6495 Value *SelectValue = ResultVector[1].first;
6496 if (DefaultResult) {
6497 Value *ValueCompare =
6498 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6499 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6500 DefaultResult, "switch.select");
6501 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6502 SI && HasBranchWeights) {
6503 // We start with 3 probabilities, where the numerator is the
6504 // corresponding BranchWeights[i], and the denominator is the sum over
6505 // BranchWeights. We want the probability and negative probability of
6506 // Condition == SecondCase.
6507 assert(BranchWeights.size() == 3);
6509 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6510 /*IsExpected=*/false, /*ElideAllZero=*/true);
6511 }
6512 }
6513 Value *ValueCompare =
6514 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6515 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6516 SelectValue, "switch.select");
6517 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6518 // We may have had a DefaultResult. Base the position of the first and
6519 // second's branch weights accordingly. Also the proability that Condition
6520 // != FirstCase needs to take that into account.
6521 assert(BranchWeights.size() >= 2);
6522 size_t FirstCasePos = (Condition != nullptr);
6523 size_t SecondCasePos = FirstCasePos + 1;
6524 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6526 {BranchWeights[FirstCasePos],
6527 DefaultCase + BranchWeights[SecondCasePos]},
6528 /*IsExpected=*/false, /*ElideAllZero=*/true);
6529 }
6530 return Ret;
6531 }
6532
6533 // Handle the degenerate case where two cases have the same result value.
6534 if (ResultVector.size() == 1 && DefaultResult) {
6535 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6536 unsigned CaseCount = CaseValues.size();
6537 // n bits group cases map to the same result:
6538 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6539 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6540 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6541 if (isPowerOf2_32(CaseCount)) {
6542 ConstantInt *MinCaseVal = CaseValues[0];
6543 // If there are bits that are set exclusively by CaseValues, we
6544 // can transform the switch into a select if the conjunction of
6545 // all the values uniquely identify CaseValues.
6546 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6547
6548 // Find the minimum value and compute the and of all the case values.
6549 for (auto *Case : CaseValues) {
6550 if (Case->getValue().slt(MinCaseVal->getValue()))
6551 MinCaseVal = Case;
6552 AndMask &= Case->getValue();
6553 }
6554 KnownBits Known = computeKnownBits(Condition, DL);
6555
6556 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6557 // Compute the number of bits that are free to vary.
6558 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6559
6560 // Check if the number of values covered by the mask is equal
6561 // to the number of cases.
6562 if (FreeBits == Log2_32(CaseCount)) {
6563 Value *And = Builder.CreateAnd(Condition, AndMask);
6564 Value *Cmp = Builder.CreateICmpEQ(
6565 And, Constant::getIntegerValue(And->getType(), AndMask));
6566 Value *Ret =
6567 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6568 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6569 // We know there's a Default case. We base the resulting branch
6570 // weights off its probability.
6571 assert(BranchWeights.size() >= 2);
6573 *SI,
6574 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6575 /*IsExpected=*/false, /*ElideAllZero=*/true);
6576 }
6577 return Ret;
6578 }
6579 }
6580
6581 // Mark the bits case number touched.
6582 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6583 for (auto *Case : CaseValues)
6584 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6585
6586 // Check if cases with the same result can cover all number
6587 // in touched bits.
6588 if (BitMask.popcount() == Log2_32(CaseCount)) {
6589 if (!MinCaseVal->isNullValue())
6590 Condition = Builder.CreateSub(Condition, MinCaseVal);
6591 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6592 Value *Cmp = Builder.CreateICmpEQ(
6593 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6594 Value *Ret =
6595 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6596 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6597 assert(BranchWeights.size() >= 2);
6599 *SI,
6600 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6601 /*IsExpected=*/false, /*ElideAllZero=*/true);
6602 }
6603 return Ret;
6604 }
6605 }
6606
6607 // Handle the degenerate case where two cases have the same value.
6608 if (CaseValues.size() == 2) {
6609 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6610 "switch.selectcmp.case1");
6611 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6612 "switch.selectcmp.case2");
6613 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6614 Value *Ret =
6615 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6616 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6617 assert(BranchWeights.size() >= 2);
6619 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6620 /*IsExpected=*/false, /*ElideAllZero=*/true);
6621 }
6622 return Ret;
6623 }
6624 }
6625
6626 return nullptr;
6627}
6628
6629// Helper function to cleanup a switch instruction that has been converted into
6630// a select, fixing up PHI nodes and basic blocks.
6632 Value *SelectValue,
6633 IRBuilder<> &Builder,
6634 DomTreeUpdater *DTU) {
6635 std::vector<DominatorTree::UpdateType> Updates;
6636
6637 BasicBlock *SelectBB = SI->getParent();
6638 BasicBlock *DestBB = PHI->getParent();
6639
6640 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6641 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6642 Builder.CreateBr(DestBB);
6643
6644 // Remove the switch.
6645
6646 PHI->removeIncomingValueIf(
6647 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6648 PHI->addIncoming(SelectValue, SelectBB);
6649
6650 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6651 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6652 BasicBlock *Succ = SI->getSuccessor(i);
6653
6654 if (Succ == DestBB)
6655 continue;
6656 Succ->removePredecessor(SelectBB);
6657 if (DTU && RemovedSuccessors.insert(Succ).second)
6658 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6659 }
6660 SI->eraseFromParent();
6661 if (DTU)
6662 DTU->applyUpdates(Updates);
6663}
6664
6665/// If a switch is only used to initialize one or more phi nodes in a common
6666/// successor block with only two different constant values, try to replace the
6667/// switch with a select. Returns true if the fold was made.
6669 DomTreeUpdater *DTU, const DataLayout &DL,
6670 const TargetTransformInfo &TTI) {
6671 Value *const Cond = SI->getCondition();
6672 PHINode *PHI = nullptr;
6673 BasicBlock *CommonDest = nullptr;
6674 Constant *DefaultResult;
6675 SwitchCaseResultVectorTy UniqueResults;
6676 // Collect all the cases that will deliver the same value from the switch.
6677 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6678 DL, TTI, /*MaxUniqueResults*/ 2))
6679 return false;
6680
6681 assert(PHI != nullptr && "PHI for value select not found");
6682 Builder.SetInsertPoint(SI);
6683 SmallVector<uint32_t, 4> BranchWeights;
6685 [[maybe_unused]] auto HasWeights =
6687 assert(!HasWeights == (BranchWeights.empty()));
6688 }
6689 assert(BranchWeights.empty() ||
6690 (BranchWeights.size() >=
6691 UniqueResults.size() + (DefaultResult != nullptr)));
6692
6693 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6694 Builder, DL, BranchWeights);
6695 if (!SelectValue)
6696 return false;
6697
6698 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6699 return true;
6700}
6701
6702namespace {
6703
6704/// This class finds alternatives for switches to ultimately
6705/// replace the switch.
6706class SwitchReplacement {
6707public:
6708 /// Create a helper for optimizations to use as a switch replacement.
6709 /// Find a better representation for the content of Values,
6710 /// using DefaultValue to fill any holes in the table.
6711 SwitchReplacement(
6712 Module &M, uint64_t TableSize, ConstantInt *Offset,
6713 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6714 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6715
6716 /// Build instructions with Builder to retrieve values using Index
6717 /// and replace the switch.
6718 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6719 Function *Func);
6720
6721 /// Return true if a table with TableSize elements of
6722 /// type ElementType would fit in a target-legal register.
6723 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6724 Type *ElementType);
6725
6726 /// Return the default value of the switch.
6727 Constant *getDefaultValue();
6728
6729 /// Return true if the replacement is a lookup table.
6730 bool isLookupTable();
6731
6732 /// Return true if the replacement is a bit map.
6733 bool isBitMap();
6734
6735private:
6736 // Depending on the switch, there are different alternatives.
6737 enum {
6738 // For switches where each case contains the same value, we just have to
6739 // store that single value and return it for each lookup.
6740 SingleValueKind,
6741
6742 // For switches where there is a linear relationship between table index
6743 // and values. We calculate the result with a simple multiplication
6744 // and addition instead of a table lookup.
6745 LinearMapKind,
6746
6747 // For small tables with integer elements, we can pack them into a bitmap
6748 // that fits into a target-legal register. Values are retrieved by
6749 // shift and mask operations.
6750 BitMapKind,
6751
6752 // The table is stored as an array of values. Values are retrieved by load
6753 // instructions from the table.
6754 LookupTableKind
6755 } Kind;
6756
6757 // The default value of the switch.
6758 Constant *DefaultValue;
6759
6760 // The type of the output values.
6761 Type *ValueType;
6762
6763 // For SingleValueKind, this is the single value.
6764 Constant *SingleValue = nullptr;
6765
6766 // For BitMapKind, this is the bitmap.
6767 ConstantInt *BitMap = nullptr;
6768 IntegerType *BitMapElementTy = nullptr;
6769
6770 // For LinearMapKind, these are the constants used to derive the value.
6771 ConstantInt *LinearOffset = nullptr;
6772 ConstantInt *LinearMultiplier = nullptr;
6773 bool LinearMapValWrapped = false;
6774
6775 // For LookupTableKind, this is the table.
6776 Constant *Initializer = nullptr;
6777};
6778
6779} // end anonymous namespace
6780
6781SwitchReplacement::SwitchReplacement(
6782 Module &M, uint64_t TableSize, ConstantInt *Offset,
6783 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6784 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6785 : DefaultValue(DefaultValue) {
6786 assert(Values.size() && "Can't build lookup table without values!");
6787 assert(TableSize >= Values.size() && "Can't fit values in table!");
6788
6789 // If all values in the table are equal, this is that value.
6790 SingleValue = Values.begin()->second;
6791
6792 ValueType = Values.begin()->second->getType();
6793
6794 // Build up the table contents.
6795 SmallVector<Constant *, 64> TableContents(TableSize);
6796 for (const auto &[CaseVal, CaseRes] : Values) {
6797 assert(CaseRes->getType() == ValueType);
6798
6799 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6800 TableContents[Idx] = CaseRes;
6801
6802 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6803 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6804 }
6805
6806 // Fill in any holes in the table with the default result.
6807 if (Values.size() < TableSize) {
6808 assert(DefaultValue &&
6809 "Need a default value to fill the lookup table holes.");
6810 assert(DefaultValue->getType() == ValueType);
6811 for (uint64_t I = 0; I < TableSize; ++I) {
6812 if (!TableContents[I])
6813 TableContents[I] = DefaultValue;
6814 }
6815
6816 // If the default value is poison, all the holes are poison.
6817 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6818
6819 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6820 SingleValue = nullptr;
6821 }
6822
6823 // If each element in the table contains the same value, we only need to store
6824 // that single value.
6825 if (SingleValue) {
6826 Kind = SingleValueKind;
6827 return;
6828 }
6829
6830 // Check if we can derive the value with a linear transformation from the
6831 // table index.
6833 bool LinearMappingPossible = true;
6834 APInt PrevVal;
6835 APInt DistToPrev;
6836 // When linear map is monotonic and signed overflow doesn't happen on
6837 // maximum index, we can attach nsw on Add and Mul.
6838 bool NonMonotonic = false;
6839 assert(TableSize >= 2 && "Should be a SingleValue table.");
6840 // Check if there is the same distance between two consecutive values.
6841 for (uint64_t I = 0; I < TableSize; ++I) {
6842 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6843
6844 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6845 // This is an poison, so it's (probably) a lookup table hole.
6846 // To prevent any regressions from before we switched to using poison as
6847 // the default value, holes will fall back to using the first value.
6848 // This can be removed once we add proper handling for poisons in lookup
6849 // tables.
6850 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6851 }
6852
6853 if (!ConstVal) {
6854 // This is an undef. We could deal with it, but undefs in lookup tables
6855 // are very seldom. It's probably not worth the additional complexity.
6856 LinearMappingPossible = false;
6857 break;
6858 }
6859 const APInt &Val = ConstVal->getValue();
6860 if (I != 0) {
6861 APInt Dist = Val - PrevVal;
6862 if (I == 1) {
6863 DistToPrev = Dist;
6864 } else if (Dist != DistToPrev) {
6865 LinearMappingPossible = false;
6866 break;
6867 }
6868 NonMonotonic |=
6869 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6870 }
6871 PrevVal = Val;
6872 }
6873 if (LinearMappingPossible) {
6874 LinearOffset = cast<ConstantInt>(TableContents[0]);
6875 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6876 APInt M = LinearMultiplier->getValue();
6877 bool MayWrap = true;
6878 if (isIntN(M.getBitWidth(), TableSize - 1))
6879 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6880 LinearMapValWrapped = NonMonotonic || MayWrap;
6881 Kind = LinearMapKind;
6882 return;
6883 }
6884 }
6885
6886 // If the type is integer and the table fits in a register, build a bitmap.
6887 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6889 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6890 for (uint64_t I = TableSize; I > 0; --I) {
6891 TableInt <<= IT->getBitWidth();
6892 // Insert values into the bitmap. Undef values are set to zero.
6893 if (!isa<UndefValue>(TableContents[I - 1])) {
6894 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6895 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6896 }
6897 }
6898 BitMap = ConstantInt::get(M.getContext(), TableInt);
6899 BitMapElementTy = IT;
6900 Kind = BitMapKind;
6901 return;
6902 }
6903
6904 // Store the table in an array.
6905 auto *TableTy = ArrayType::get(ValueType, TableSize);
6906 Initializer = ConstantArray::get(TableTy, TableContents);
6907
6908 Kind = LookupTableKind;
6909}
6910
6911Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6912 const DataLayout &DL, Function *Func) {
6913 switch (Kind) {
6914 case SingleValueKind:
6915 return SingleValue;
6916 case LinearMapKind: {
6917 ++NumLinearMaps;
6918 // Derive the result value from the input value.
6919 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6920 false, "switch.idx.cast");
6921 if (!LinearMultiplier->isOne())
6922 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6923 /*HasNUW = */ false,
6924 /*HasNSW = */ !LinearMapValWrapped);
6925
6926 if (!LinearOffset->isZero())
6927 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6928 /*HasNUW = */ false,
6929 /*HasNSW = */ !LinearMapValWrapped);
6930 return Result;
6931 }
6932 case BitMapKind: {
6933 ++NumBitMaps;
6934 // Type of the bitmap (e.g. i59).
6935 IntegerType *MapTy = BitMap->getIntegerType();
6936
6937 // Cast Index to the same type as the bitmap.
6938 // Note: The Index is <= the number of elements in the table, so
6939 // truncating it to the width of the bitmask is safe.
6940 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6941
6942 // Multiply the shift amount by the element width. NUW/NSW can always be
6943 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6944 // BitMap's bit width.
6945 ShiftAmt = Builder.CreateMul(
6946 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6947 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6948
6949 // Shift down.
6950 Value *DownShifted =
6951 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6952 // Mask off.
6953 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6954 }
6955 case LookupTableKind: {
6956 ++NumLookupTables;
6957 auto *Table =
6958 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6959 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6960 Initializer, "switch.table." + Func->getName());
6961 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6962 // Set the alignment to that of an array items. We will be only loading one
6963 // value out of it.
6964 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6965 Type *IndexTy = DL.getIndexType(Table->getType());
6966 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6967
6968 if (Index->getType() != IndexTy) {
6969 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6970 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6971 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6972 Zext->setNonNeg(
6973 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6974 }
6975
6976 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6977 Value *GEP =
6978 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6979 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6980 }
6981 }
6982 llvm_unreachable("Unknown helper kind!");
6983}
6984
6985bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6986 uint64_t TableSize,
6987 Type *ElementType) {
6988 auto *IT = dyn_cast<IntegerType>(ElementType);
6989 if (!IT)
6990 return false;
6991 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6992 // are <= 15, we could try to narrow the type.
6993
6994 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6995 if (TableSize >= UINT_MAX / IT->getBitWidth())
6996 return false;
6997 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6998}
6999
7001 const DataLayout &DL) {
7002 // Allow any legal type.
7003 if (TTI.isTypeLegal(Ty))
7004 return true;
7005
7006 auto *IT = dyn_cast<IntegerType>(Ty);
7007 if (!IT)
7008 return false;
7009
7010 // Also allow power of 2 integer types that have at least 8 bits and fit in
7011 // a register. These types are common in frontend languages and targets
7012 // usually support loads of these types.
7013 // TODO: We could relax this to any integer that fits in a register and rely
7014 // on ABI alignment and padding in the table to allow the load to be widened.
7015 // Or we could widen the constants and truncate the load.
7016 unsigned BitWidth = IT->getBitWidth();
7017 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7018 DL.fitsInLegalInteger(IT->getBitWidth());
7019}
7020
7021Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7022
7023bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7024
7025bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7026
7027static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7028 // 40% is the default density for building a jump table in optsize/minsize
7029 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7030 // function was based on.
7031 const uint64_t MinDensity = 40;
7032
7033 if (CaseRange >= UINT64_MAX / 100)
7034 return false; // Avoid multiplication overflows below.
7035
7036 return NumCases * 100 >= CaseRange * MinDensity;
7037}
7038
7040 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7041 uint64_t Range = Diff + 1;
7042 if (Range < Diff)
7043 return false; // Overflow.
7044
7045 return isSwitchDense(Values.size(), Range);
7046}
7047
7048/// Determine whether a lookup table should be built for this switch, based on
7049/// the number of cases, size of the table, and the types of the results.
7050// TODO: We could support larger than legal types by limiting based on the
7051// number of loads required and/or table size. If the constants are small we
7052// could use smaller table entries and extend after the load.
7054 const TargetTransformInfo &TTI,
7055 const DataLayout &DL,
7056 const SmallVector<Type *> &ResultTypes) {
7057 if (SI->getNumCases() > TableSize)
7058 return false; // TableSize overflowed.
7059
7060 bool AllTablesFitInRegister = true;
7061 bool HasIllegalType = false;
7062 for (const auto &Ty : ResultTypes) {
7063 // Saturate this flag to true.
7064 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7065
7066 // Saturate this flag to false.
7067 AllTablesFitInRegister =
7068 AllTablesFitInRegister &&
7069 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7070
7071 // If both flags saturate, we're done. NOTE: This *only* works with
7072 // saturating flags, and all flags have to saturate first due to the
7073 // non-deterministic behavior of iterating over a dense map.
7074 if (HasIllegalType && !AllTablesFitInRegister)
7075 break;
7076 }
7077
7078 // If each table would fit in a register, we should build it anyway.
7079 if (AllTablesFitInRegister)
7080 return true;
7081
7082 // Don't build a table that doesn't fit in-register if it has illegal types.
7083 if (HasIllegalType)
7084 return false;
7085
7086 return isSwitchDense(SI->getNumCases(), TableSize);
7087}
7088
7090 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7091 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7092 const DataLayout &DL, const TargetTransformInfo &TTI) {
7093 if (MinCaseVal.isNullValue())
7094 return true;
7095 if (MinCaseVal.isNegative() ||
7096 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7097 !HasDefaultResults)
7098 return false;
7099 return all_of(ResultTypes, [&](const auto &ResultType) {
7100 return SwitchReplacement::wouldFitInRegister(
7101 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7102 });
7103}
7104
7105/// Try to reuse the switch table index compare. Following pattern:
7106/// \code
7107/// if (idx < tablesize)
7108/// r = table[idx]; // table does not contain default_value
7109/// else
7110/// r = default_value;
7111/// if (r != default_value)
7112/// ...
7113/// \endcode
7114/// Is optimized to:
7115/// \code
7116/// cond = idx < tablesize;
7117/// if (cond)
7118/// r = table[idx];
7119/// else
7120/// r = default_value;
7121/// if (cond)
7122/// ...
7123/// \endcode
7124/// Jump threading will then eliminate the second if(cond).
7126 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7127 Constant *DefaultValue,
7128 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7130 if (!CmpInst)
7131 return;
7132
7133 // We require that the compare is in the same block as the phi so that jump
7134 // threading can do its work afterwards.
7135 if (CmpInst->getParent() != PhiBlock)
7136 return;
7137
7139 if (!CmpOp1)
7140 return;
7141
7142 Value *RangeCmp = RangeCheckBranch->getCondition();
7143 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7144 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7145
7146 // Check if the compare with the default value is constant true or false.
7147 const DataLayout &DL = PhiBlock->getDataLayout();
7149 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7150 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7151 return;
7152
7153 // Check if the compare with the case values is distinct from the default
7154 // compare result.
7155 for (auto ValuePair : Values) {
7157 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7158 if (!CaseConst || CaseConst == DefaultConst ||
7159 (CaseConst != TrueConst && CaseConst != FalseConst))
7160 return;
7161 }
7162
7163 // Check if the branch instruction dominates the phi node. It's a simple
7164 // dominance check, but sufficient for our needs.
7165 // Although this check is invariant in the calling loops, it's better to do it
7166 // at this late stage. Practically we do it at most once for a switch.
7167 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7168 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7169 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7170 return;
7171 }
7172
7173 if (DefaultConst == FalseConst) {
7174 // The compare yields the same result. We can replace it.
7175 CmpInst->replaceAllUsesWith(RangeCmp);
7176 ++NumTableCmpReuses;
7177 } else {
7178 // The compare yields the same result, just inverted. We can replace it.
7179 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7180 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7181 RangeCheckBranch->getIterator());
7182 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7183 ++NumTableCmpReuses;
7184 }
7185}
7186
7187/// If the switch is only used to initialize one or more phi nodes in a common
7188/// successor block with different constant values, replace the switch with
7189/// lookup tables.
7191 DomTreeUpdater *DTU, const DataLayout &DL,
7192 const TargetTransformInfo &TTI,
7193 bool ConvertSwitchToLookupTable) {
7194 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7195
7196 BasicBlock *BB = SI->getParent();
7197 Function *Fn = BB->getParent();
7198
7199 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7200 // split off a dense part and build a lookup table for that.
7201
7202 // FIXME: This creates arrays of GEPs to constant strings, which means each
7203 // GEP needs a runtime relocation in PIC code. We should just build one big
7204 // string and lookup indices into that.
7205
7206 // Ignore switches with less than three cases. Lookup tables will not make
7207 // them faster, so we don't analyze them.
7208 if (SI->getNumCases() < 3)
7209 return false;
7210
7211 // Figure out the corresponding result for each case value and phi node in the
7212 // common destination, as well as the min and max case values.
7213 assert(!SI->cases().empty());
7214 SwitchInst::CaseIt CI = SI->case_begin();
7215 ConstantInt *MinCaseVal = CI->getCaseValue();
7216 ConstantInt *MaxCaseVal = CI->getCaseValue();
7217
7218 BasicBlock *CommonDest = nullptr;
7219
7220 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7222
7224 SmallVector<Type *> ResultTypes;
7226
7227 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7228 ConstantInt *CaseVal = CI->getCaseValue();
7229 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7230 MinCaseVal = CaseVal;
7231 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7232 MaxCaseVal = CaseVal;
7233
7234 // Resulting value at phi nodes for this case value.
7236 ResultsTy Results;
7237 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7238 Results, DL, TTI))
7239 return false;
7240
7241 // Append the result and result types from this case to the list for each
7242 // phi.
7243 for (const auto &I : Results) {
7244 PHINode *PHI = I.first;
7245 Constant *Value = I.second;
7246 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7247 if (Inserted)
7248 PHIs.push_back(PHI);
7249 It->second.push_back(std::make_pair(CaseVal, Value));
7250 ResultTypes.push_back(PHI->getType());
7251 }
7252 }
7253
7254 // If the table has holes, we need a constant result for the default case
7255 // or a bitmask that fits in a register.
7256 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7257 bool HasDefaultResults =
7258 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7259 DefaultResultsList, DL, TTI);
7260 for (const auto &I : DefaultResultsList) {
7261 PHINode *PHI = I.first;
7262 Constant *Result = I.second;
7263 DefaultResults[PHI] = Result;
7264 }
7265
7266 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7267 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7268 uint64_t TableSize;
7269 ConstantInt *TableIndexOffset;
7270 if (UseSwitchConditionAsTableIndex) {
7271 TableSize = MaxCaseVal->getLimitedValue() + 1;
7272 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7273 } else {
7274 TableSize =
7275 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7276
7277 TableIndexOffset = MinCaseVal;
7278 }
7279
7280 // If the default destination is unreachable, or if the lookup table covers
7281 // all values of the conditional variable, branch directly to the lookup table
7282 // BB. Otherwise, check that the condition is within the case range.
7283 uint64_t NumResults = ResultLists[PHIs[0]].size();
7284 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7285
7286 bool TableHasHoles = (NumResults < TableSize);
7287
7288 // If the table has holes but the default destination doesn't produce any
7289 // constant results, the lookup table entries corresponding to the holes will
7290 // contain poison.
7291 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7292
7293 // If the default destination doesn't produce a constant result but is still
7294 // reachable, and the lookup table has holes, we need to use a mask to
7295 // determine if the current index should load from the lookup table or jump
7296 // to the default case.
7297 // The mask is unnecessary if the table has holes but the default destination
7298 // is unreachable, as in that case the holes must also be unreachable.
7299 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7300 if (NeedMask) {
7301 // As an extra penalty for the validity test we require more cases.
7302 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7303 return false;
7304 if (!DL.fitsInLegalInteger(TableSize))
7305 return false;
7306 }
7307
7308 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7309 return false;
7310
7311 // Compute the table index value.
7312 Value *TableIndex;
7313 if (UseSwitchConditionAsTableIndex) {
7314 TableIndex = SI->getCondition();
7315 if (HasDefaultResults) {
7316 // Grow the table to cover all possible index values to avoid the range
7317 // check. It will use the default result to fill in the table hole later,
7318 // so make sure it exist.
7319 ConstantRange CR =
7320 computeConstantRange(TableIndex, /* ForSigned */ false);
7321 // Grow the table shouldn't have any size impact by checking
7322 // wouldFitInRegister.
7323 // TODO: Consider growing the table also when it doesn't fit in a register
7324 // if no optsize is specified.
7325 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7326 if (!CR.isUpperWrapped() &&
7327 all_of(ResultTypes, [&](const auto &ResultType) {
7328 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7329 ResultType);
7330 })) {
7331 // There may be some case index larger than the UpperBound (unreachable
7332 // case), so make sure the table size does not get smaller.
7333 TableSize = std::max(UpperBound, TableSize);
7334 // The default branch is unreachable after we enlarge the lookup table.
7335 // Adjust DefaultIsReachable to reuse code path.
7336 DefaultIsReachable = false;
7337 }
7338 }
7339 }
7340
7341 // Keep track of the switch replacement for each phi
7343 for (PHINode *PHI : PHIs) {
7344 const auto &ResultList = ResultLists[PHI];
7345
7346 Type *ResultType = ResultList.begin()->second->getType();
7347 // Use any value to fill the lookup table holes.
7348 Constant *DefaultVal =
7349 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7350 StringRef FuncName = Fn->getName();
7351 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7352 ResultList, DefaultVal, DL, FuncName);
7353 PhiToReplacementMap.insert({PHI, Replacement});
7354 }
7355
7356 bool AnyLookupTables = any_of(
7357 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7358 bool AnyBitMaps = any_of(PhiToReplacementMap,
7359 [](auto &KV) { return KV.second.isBitMap(); });
7360
7361 // A few conditions prevent the generation of lookup tables:
7362 // 1. The target does not support lookup tables.
7363 // 2. The "no-jump-tables" function attribute is set.
7364 // However, these objections do not apply to other switch replacements, like
7365 // the bitmap, so we only stop here if any of these conditions are met and we
7366 // want to create a LUT. Otherwise, continue with the switch replacement.
7367 if (AnyLookupTables &&
7368 (!TTI.shouldBuildLookupTables() ||
7369 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7370 return false;
7371
7372 // In the early optimization pipeline, disable formation of lookup tables,
7373 // bit maps and mask checks, as they may inhibit further optimization.
7374 if (!ConvertSwitchToLookupTable &&
7375 (AnyLookupTables || AnyBitMaps || NeedMask))
7376 return false;
7377
7378 Builder.SetInsertPoint(SI);
7379 // TableIndex is the switch condition - TableIndexOffset if we don't
7380 // use the condition directly
7381 if (!UseSwitchConditionAsTableIndex) {
7382 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7383 // we can try to attach nsw.
7384 bool MayWrap = true;
7385 if (!DefaultIsReachable) {
7386 APInt Res =
7387 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7388 (void)Res;
7389 }
7390 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7391 "switch.tableidx", /*HasNUW =*/false,
7392 /*HasNSW =*/!MayWrap);
7393 }
7394
7395 std::vector<DominatorTree::UpdateType> Updates;
7396
7397 // Compute the maximum table size representable by the integer type we are
7398 // switching upon.
7399 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7400 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7401 assert(MaxTableSize >= TableSize &&
7402 "It is impossible for a switch to have more entries than the max "
7403 "representable value of its input integer type's size.");
7404
7405 // Create the BB that does the lookups.
7406 Module &Mod = *CommonDest->getParent()->getParent();
7407 BasicBlock *LookupBB = BasicBlock::Create(
7408 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7409
7410 CondBrInst *RangeCheckBranch = nullptr;
7411 CondBrInst *CondBranch = nullptr;
7412
7413 Builder.SetInsertPoint(SI);
7414 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7415 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7416 Builder.CreateBr(LookupBB);
7417 if (DTU)
7418 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7419 // Note: We call removeProdecessor later since we need to be able to get the
7420 // PHI value for the default case in case we're using a bit mask.
7421 } else {
7422 Value *Cmp = Builder.CreateICmpULT(
7423 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7424 RangeCheckBranch =
7425 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7426 CondBranch = RangeCheckBranch;
7427 if (DTU)
7428 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7429 }
7430
7431 // Populate the BB that does the lookups.
7432 Builder.SetInsertPoint(LookupBB);
7433
7434 if (NeedMask) {
7435 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7436 // re-purposed to do the hole check, and we create a new LookupBB.
7437 BasicBlock *MaskBB = LookupBB;
7438 MaskBB->setName("switch.hole_check");
7439 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7440 CommonDest->getParent(), CommonDest);
7441
7442 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7443 // unnecessary illegal types.
7444 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7445 APInt MaskInt(TableSizePowOf2, 0);
7446 APInt One(TableSizePowOf2, 1);
7447 // Build bitmask; fill in a 1 bit for every case.
7448 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7449 for (const auto &Result : ResultList) {
7450 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7451 .getLimitedValue();
7452 MaskInt |= One << Idx;
7453 }
7454 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7455
7456 // Get the TableIndex'th bit of the bitmask.
7457 // If this bit is 0 (meaning hole) jump to the default destination,
7458 // else continue with table lookup.
7459 IntegerType *MapTy = TableMask->getIntegerType();
7460 Value *MaskIndex =
7461 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7462 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7463 Value *LoBit = Builder.CreateTrunc(
7464 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7465 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7466 if (DTU) {
7467 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7468 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7469 }
7470 Builder.SetInsertPoint(LookupBB);
7471 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7472 }
7473
7474 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7475 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7476 // do not delete PHINodes here.
7477 SI->getDefaultDest()->removePredecessor(BB,
7478 /*KeepOneInputPHIs=*/true);
7479 if (DTU)
7480 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7481 }
7482
7483 for (PHINode *PHI : PHIs) {
7484 const ResultListTy &ResultList = ResultLists[PHI];
7485 auto Replacement = PhiToReplacementMap.at(PHI);
7486 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7487 // Do a small peephole optimization: re-use the switch table compare if
7488 // possible.
7489 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7490 BasicBlock *PhiBlock = PHI->getParent();
7491 // Search for compare instructions which use the phi.
7492 for (auto *User : PHI->users()) {
7493 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7494 Replacement.getDefaultValue(), ResultList);
7495 }
7496 }
7497
7498 PHI->addIncoming(Result, LookupBB);
7499 }
7500
7501 Builder.CreateBr(CommonDest);
7502 if (DTU)
7503 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7504
7505 SmallVector<uint32_t> BranchWeights;
7506 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7507 extractBranchWeights(*SI, BranchWeights);
7508 uint64_t ToLookupWeight = 0;
7509 uint64_t ToDefaultWeight = 0;
7510
7511 // Remove the switch.
7512 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7513 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7514 BasicBlock *Succ = SI->getSuccessor(I);
7515
7516 if (Succ == SI->getDefaultDest()) {
7517 if (HasBranchWeights)
7518 ToDefaultWeight += BranchWeights[I];
7519 continue;
7520 }
7521 Succ->removePredecessor(BB);
7522 if (DTU && RemovedSuccessors.insert(Succ).second)
7523 Updates.push_back({DominatorTree::Delete, BB, Succ});
7524 if (HasBranchWeights)
7525 ToLookupWeight += BranchWeights[I];
7526 }
7527 SI->eraseFromParent();
7528 if (HasBranchWeights)
7529 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7530 /*IsExpected=*/false);
7531 if (DTU)
7532 DTU->applyUpdates(Updates);
7533
7534 if (NeedMask)
7535 ++NumLookupTablesHoles;
7536 return true;
7537}
7538
7539/// Try to transform a switch that has "holes" in it to a contiguous sequence
7540/// of cases.
7541///
7542/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7543/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7544///
7545/// This converts a sparse switch into a dense switch which allows better
7546/// lowering and could also allow transforming into a lookup table.
7548 const DataLayout &DL,
7549 const TargetTransformInfo &TTI) {
7550 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7551 if (CondTy->getIntegerBitWidth() > 64 ||
7552 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7553 return false;
7554 // Only bother with this optimization if there are more than 3 switch cases;
7555 // SDAG will only bother creating jump tables for 4 or more cases.
7556 if (SI->getNumCases() < 4)
7557 return false;
7558
7559 // This transform is agnostic to the signedness of the input or case values. We
7560 // can treat the case values as signed or unsigned. We can optimize more common
7561 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7562 // as signed.
7564 for (const auto &C : SI->cases())
7565 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7566 llvm::sort(Values);
7567
7568 // If the switch is already dense, there's nothing useful to do here.
7569 if (isSwitchDense(Values))
7570 return false;
7571
7572 // First, transform the values such that they start at zero and ascend.
7573 int64_t Base = Values[0];
7574 for (auto &V : Values)
7575 V -= (uint64_t)(Base);
7576
7577 // Now we have signed numbers that have been shifted so that, given enough
7578 // precision, there are no negative values. Since the rest of the transform
7579 // is bitwise only, we switch now to an unsigned representation.
7580
7581 // This transform can be done speculatively because it is so cheap - it
7582 // results in a single rotate operation being inserted.
7583
7584 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7585 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7586 // less than 64.
7587 unsigned Shift = 64;
7588 for (auto &V : Values)
7589 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7590 assert(Shift < 64);
7591 if (Shift > 0)
7592 for (auto &V : Values)
7593 V = (int64_t)((uint64_t)V >> Shift);
7594
7595 if (!isSwitchDense(Values))
7596 // Transform didn't create a dense switch.
7597 return false;
7598
7599 // The obvious transform is to shift the switch condition right and emit a
7600 // check that the condition actually cleanly divided by GCD, i.e.
7601 // C & (1 << Shift - 1) == 0
7602 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7603 //
7604 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7605 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7606 // are nonzero then the switch condition will be very large and will hit the
7607 // default case.
7608
7609 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7610 Builder.SetInsertPoint(SI);
7611 Value *Sub =
7612 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7613 Value *Rot = Builder.CreateIntrinsic(
7614 Ty, Intrinsic::fshl,
7615 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7616 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7617
7618 for (auto Case : SI->cases()) {
7619 auto *Orig = Case.getCaseValue();
7620 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7621 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7622 }
7623 return true;
7624}
7625
7626/// Tries to transform the switch when the condition is umin with a constant.
7627/// In that case, the default branch can be replaced by the constant's branch.
7628/// This method also removes dead cases when the simplification cannot replace
7629/// the default branch.
7630///
7631/// For example:
7632/// switch(umin(a, 3)) {
7633/// case 0:
7634/// case 1:
7635/// case 2:
7636/// case 3:
7637/// case 4:
7638/// // ...
7639/// default:
7640/// unreachable
7641/// }
7642///
7643/// Transforms into:
7644///
7645/// switch(a) {
7646/// case 0:
7647/// case 1:
7648/// case 2:
7649/// default:
7650/// // This is case 3
7651/// }
7653 Value *A;
7655
7656 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7657 return false;
7658
7661 BasicBlock *BB = SIW->getParent();
7662
7663 // Dead cases are removed even when the simplification fails.
7664 // A case is dead when its value is higher than the Constant.
7665 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7666 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7667 ++I;
7668 continue;
7669 }
7670 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7671 DeadCaseBB->removePredecessor(BB);
7672 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7673 I = SIW.removeCase(I);
7674 E = SIW->case_end();
7675 }
7676
7677 auto Case = SI->findCaseValue(Constant);
7678 // If the case value is not found, `findCaseValue` returns the default case.
7679 // In this scenario, since there is no explicit `case 3:`, the simplification
7680 // fails. The simplification also fails when the switch’s default destination
7681 // is reachable.
7682 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7683 if (DTU)
7684 DTU->applyUpdates(Updates);
7685 return !Updates.empty();
7686 }
7687
7688 BasicBlock *Unreachable = SI->getDefaultDest();
7689 SIW.replaceDefaultDest(Case);
7690 SIW.removeCase(Case);
7691 SIW->setCondition(A);
7692
7693 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7694
7695 if (DTU)
7696 DTU->applyUpdates(Updates);
7697
7698 return true;
7699}
7700
7701/// Tries to transform switch of powers of two to reduce switch range.
7702/// For example, switch like:
7703/// switch (C) { case 1: case 2: case 64: case 128: }
7704/// will be transformed to:
7705/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7706///
7707/// This transformation allows better lowering and may transform the switch
7708/// instruction into a sequence of bit manipulation and a smaller
7709/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7710/// address of the jump target, and indirectly jump to it).
7712 DomTreeUpdater *DTU,
7713 const DataLayout &DL,
7714 const TargetTransformInfo &TTI) {
7715 Value *Condition = SI->getCondition();
7716 LLVMContext &Context = SI->getContext();
7717 auto *CondTy = cast<IntegerType>(Condition->getType());
7718
7719 if (CondTy->getIntegerBitWidth() > 64 ||
7720 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7721 return false;
7722
7723 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7724 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7725 {Condition, ConstantInt::getTrue(Context)});
7726 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7727 TTI::TCC_Basic * 2)
7728 return false;
7729
7730 // Only bother with this optimization if there are more than 3 switch cases.
7731 // SDAG will start emitting jump tables for 4 or more cases.
7732 if (SI->getNumCases() < 4)
7733 return false;
7734
7735 // Check that switch cases are powers of two.
7737 for (const auto &Case : SI->cases()) {
7738 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7739 if (llvm::has_single_bit(CaseValue))
7740 Values.push_back(CaseValue);
7741 else
7742 return false;
7743 }
7744
7745 // isSwichDense requires case values to be sorted.
7746 llvm::sort(Values);
7747 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7748 llvm::countr_zero(Values.front()) + 1))
7749 // Transform is unable to generate dense switch.
7750 return false;
7751
7752 Builder.SetInsertPoint(SI);
7753
7754 if (!SI->defaultDestUnreachable()) {
7755 // Let non-power-of-two inputs jump to the default case, when the latter is
7756 // reachable.
7757 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7758 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7759
7760 auto *OrigBB = SI->getParent();
7761 auto *DefaultCaseBB = SI->getDefaultDest();
7762 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7763 auto It = OrigBB->getTerminator()->getIterator();
7764 SmallVector<uint32_t> Weights;
7765 auto HasWeights =
7767 auto *BI = CondBrInst::Create(IsPow2, SplitBB, DefaultCaseBB, It);
7768 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7769 // IsPow2 covers a subset of the cases in which we'd go to the default
7770 // label. The other is those powers of 2 that don't appear in the case
7771 // statement. We don't know the distribution of the values coming in, so
7772 // the safest is to split 50-50 the original probability to `default`.
7773 uint64_t OrigDenominator =
7775 SmallVector<uint64_t> NewWeights(2);
7776 NewWeights[1] = Weights[0] / 2;
7777 NewWeights[0] = OrigDenominator - NewWeights[1];
7778 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7779 // The probability of executing the default block stays constant. It was
7780 // p_d = Weights[0] / OrigDenominator
7781 // we rewrite as W/D
7782 // We want to find the probability of the default branch of the switch
7783 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7784 // i.e. the original probability is the probability we go to the default
7785 // branch from the BI branch, or we take the default branch on the SI.
7786 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7787 // This matches using W/2 for the default branch probability numerator and
7788 // D-W/2 as the denominator.
7789 Weights[0] = NewWeights[1];
7790 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7791 for (auto &W : drop_begin(Weights))
7792 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7793
7794 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7795 }
7796 // BI is handling the default case for SI, and so should share its DebugLoc.
7797 BI->setDebugLoc(SI->getDebugLoc());
7798 It->eraseFromParent();
7799
7800 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7801 if (DTU)
7802 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7803 }
7804
7805 // Replace each case with its trailing zeros number.
7806 for (auto &Case : SI->cases()) {
7807 auto *OrigValue = Case.getCaseValue();
7808 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7809 OrigValue->getValue().countr_zero()));
7810 }
7811
7812 // Replace condition with its trailing zeros number.
7813 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7814 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7815
7816 SI->setCondition(ConditionTrailingZeros);
7817
7818 return true;
7819}
7820
7821/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7822/// the same destination.
7824 DomTreeUpdater *DTU) {
7825 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7826 if (!Cmp || !Cmp->hasOneUse())
7827 return false;
7828
7830 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7831 if (!HasWeights)
7832 Weights.resize(4); // Avoid checking HasWeights everywhere.
7833
7834 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7835 int64_t Res;
7836 BasicBlock *Succ, *OtherSucc;
7837 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7838 BasicBlock *Unreachable = nullptr;
7839
7840 if (SI->getNumCases() == 2) {
7841 // Find which of 1, 0 or -1 is missing (handled by default dest).
7842 SmallSet<int64_t, 3> Missing;
7843 Missing.insert(1);
7844 Missing.insert(0);
7845 Missing.insert(-1);
7846
7847 Succ = SI->getDefaultDest();
7848 SuccWeight = Weights[0];
7849 OtherSucc = nullptr;
7850 for (auto &Case : SI->cases()) {
7851 std::optional<int64_t> Val =
7852 Case.getCaseValue()->getValue().trySExtValue();
7853 if (!Val)
7854 return false;
7855 if (!Missing.erase(*Val))
7856 return false;
7857 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7858 return false;
7859 OtherSucc = Case.getCaseSuccessor();
7860 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7861 }
7862
7863 assert(Missing.size() == 1 && "Should have one case left");
7864 Res = *Missing.begin();
7865 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7866 // Normalize so that Succ is taken once and OtherSucc twice.
7867 Unreachable = SI->getDefaultDest();
7868 Succ = OtherSucc = nullptr;
7869 for (auto &Case : SI->cases()) {
7870 BasicBlock *NewSucc = Case.getCaseSuccessor();
7871 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7872 if (!OtherSucc || OtherSucc == NewSucc) {
7873 OtherSucc = NewSucc;
7874 OtherSuccWeight += Weight;
7875 } else if (!Succ) {
7876 Succ = NewSucc;
7877 SuccWeight = Weight;
7878 } else if (Succ == NewSucc) {
7879 std::swap(Succ, OtherSucc);
7880 std::swap(SuccWeight, OtherSuccWeight);
7881 } else
7882 return false;
7883 }
7884 for (auto &Case : SI->cases()) {
7885 std::optional<int64_t> Val =
7886 Case.getCaseValue()->getValue().trySExtValue();
7887 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7888 return false;
7889 if (Case.getCaseSuccessor() == Succ) {
7890 Res = *Val;
7891 break;
7892 }
7893 }
7894 } else {
7895 return false;
7896 }
7897
7898 // Determine predicate for the missing case.
7900 switch (Res) {
7901 case 1:
7902 Pred = ICmpInst::ICMP_UGT;
7903 break;
7904 case 0:
7905 Pred = ICmpInst::ICMP_EQ;
7906 break;
7907 case -1:
7908 Pred = ICmpInst::ICMP_ULT;
7909 break;
7910 }
7911 if (Cmp->isSigned())
7912 Pred = ICmpInst::getSignedPredicate(Pred);
7913
7914 MDNode *NewWeights = nullptr;
7915 if (HasWeights)
7916 NewWeights = MDBuilder(SI->getContext())
7917 .createBranchWeights(SuccWeight, OtherSuccWeight);
7918
7919 BasicBlock *BB = SI->getParent();
7920 Builder.SetInsertPoint(SI->getIterator());
7921 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7922 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7923 SI->getMetadata(LLVMContext::MD_unpredictable));
7924 OtherSucc->removePredecessor(BB);
7925 if (Unreachable)
7926 Unreachable->removePredecessor(BB);
7927 SI->eraseFromParent();
7928 Cmp->eraseFromParent();
7929 if (DTU && Unreachable)
7930 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7931 return true;
7932}
7933
7934/// Checking whether two BBs are equal depends on the contents of the
7935/// BasicBlock and the incoming values of their successor PHINodes.
7936/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7937/// calling this function on each BasicBlock every time isEqual is called,
7938/// especially since the same BasicBlock may be passed as an argument multiple
7939/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7940/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7941/// of the incoming values.
7944
7945 // One Phi usually has < 8 incoming values.
7949
7950 // We only merge the identical non-entry BBs with
7951 // - terminator unconditional br to Succ (pending relaxation),
7952 // - does not have address taken / weird control.
7953 static bool canBeMerged(const BasicBlock *BB) {
7954 assert(BB && "Expected non-null BB");
7955 // Entry block cannot be eliminated or have predecessors.
7956 if (BB->isEntryBlock())
7957 return false;
7958
7959 // Single successor and must be Succ.
7960 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7961 // on other kinds of terminators. We decide to only support unconditional
7962 // branches for now for compile time reasons.
7963 auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
7964 if (!BI)
7965 return false;
7966
7967 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
7968 // uses.
7969 if (BB->hasAddressTaken() || BB->isEHPad())
7970 return false;
7971
7972 // TODO: relax this condition to merge equal blocks with >1 instructions?
7973 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
7974 if (&BB->front() != &BB->back())
7975 return false;
7976
7977 // The BB must have at least one predecessor.
7978 if (pred_empty(BB))
7979 return false;
7980
7981 return true;
7982 }
7983};
7984
7986 static const EqualBBWrapper *getEmptyKey() {
7987 return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
7988 }
7990 return static_cast<EqualBBWrapper *>(
7992 }
7993 static unsigned getHashValue(const EqualBBWrapper *EBW) {
7994 BasicBlock *BB = EBW->BB;
7996 assert(BB->size() == 1 && "Expected just a single branch in the BB");
7997
7998 // Since we assume the BB is just a single UncondBrInst with a single
7999 // successor, we hash as the BB and the incoming Values of its successor
8000 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8001 // including the incoming PHI values leads to better performance.
8002 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8003 // time and passing it in EqualBBWrapper, but this slowed down the average
8004 // compile time without having any impact on the worst case compile time.
8005 BasicBlock *Succ = BI->getSuccessor();
8006 auto PhiValsForBB = map_range(Succ->phis(), [&](PHINode &Phi) {
8007 return (*EBW->PhiPredIVs)[&Phi][BB];
8008 });
8009 return hash_combine(Succ, hash_combine_range(PhiValsForBB));
8010 }
8011 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8014 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8015 return LHS == RHS;
8016
8017 BasicBlock *A = LHS->BB;
8018 BasicBlock *B = RHS->BB;
8019
8020 // FIXME: we checked that the size of A and B are both 1 in
8021 // mergeIdenticalUncondBBs to make the Case list smaller to
8022 // improve performance. If we decide to support BasicBlocks with more
8023 // than just a single instruction, we need to check that A.size() ==
8024 // B.size() here, and we need to check more than just the BranchInsts
8025 // for equality.
8026
8027 UncondBrInst *ABI = cast<UncondBrInst>(A->getTerminator());
8028 UncondBrInst *BBI = cast<UncondBrInst>(B->getTerminator());
8029 if (ABI->getSuccessor() != BBI->getSuccessor())
8030 return false;
8031
8032 // Need to check that PHIs in successor have matching values.
8033 BasicBlock *Succ = ABI->getSuccessor();
8034 auto IfPhiIVMatch = [&](PHINode &Phi) {
8035 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8036 // query.
8037 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8038 return PredIVs[A] == PredIVs[B];
8039 };
8040 return all_of(Succ->phis(), IfPhiIVMatch);
8041 }
8042};
8043
8044// Merge identical BBs into one of them.
8046 DomTreeUpdater *DTU) {
8047 if (Candidates.size() < 2)
8048 return false;
8049
8050 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8051 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8052 // an entire PHI at once after the loop, opposed to calling
8053 // getIncomingValueForBlock inside this loop, since each call to
8054 // getIncomingValueForBlock is O(|Preds|).
8055 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8057 BBs2Merge.reserve(Candidates.size());
8059
8060 for (BasicBlock *BB : Candidates) {
8061 BasicBlock *Succ = BB->getSingleSuccessor();
8062 assert(Succ && "Expected unconditional BB");
8063 BBs2Merge.emplace_back(EqualBBWrapper{BB, &PhiPredIVs});
8064 Phis.insert_range(make_pointer_range(Succ->phis()));
8065 }
8066
8067 // Precompute a data structure to improve performance of isEqual for
8068 // EqualBBWrapper.
8069 PhiPredIVs.reserve(Phis.size());
8070 for (PHINode *Phi : Phis) {
8071 auto &IVs =
8072 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8073 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8074 // O(|Pred|).
8075 for (auto &IV : Phi->incoming_values())
8076 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8077 }
8078
8079 // Group duplicates using DenseSet with custom equality/hashing.
8080 // Build a set such that if the EqualBBWrapper exists in the set and another
8081 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8082 // the set should be replaced with the one in the set. If the EqualBBWrapper
8083 // is not in the set, then it should be added to the set so other
8084 // EqualBBWrapper can check against it in the same manner. We use
8085 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8086 // information to isEquality, getHashValue, and when doing the replacement
8087 // with better performance.
8089 Keep.reserve(BBs2Merge.size());
8090
8092 Updates.reserve(BBs2Merge.size() * 2);
8093
8094 bool MadeChange = false;
8095
8096 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8097 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8100 if (DTU) {
8101 // All predecessors of DeadPred (except the common predecessor) will be
8102 // moved to LivePred.
8103 Updates.reserve(Updates.size() + DeadPreds.size() * 2);
8105 predecessors(Live));
8106 for (BasicBlock *PredOfDead : DeadPreds) {
8107 // Do not modify those common predecessors of DeadPred and LivePred.
8108 if (!LivePreds.contains(PredOfDead))
8109 Updates.push_back({DominatorTree::Insert, PredOfDead, Live});
8110 Updates.push_back({DominatorTree::Delete, PredOfDead, Dead});
8111 }
8112 }
8113 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8114 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8115 Live->printAsOperand(dbgs()); dbgs() << " for ";
8116 Live->getSingleSuccessor()->printAsOperand(dbgs());
8117 dbgs() << "\n");
8118 // Replace successors in all predecessors of DeadPred.
8119 for (BasicBlock *PredOfDead : DeadPreds) {
8120 Instruction *T = PredOfDead->getTerminator();
8121 T->replaceSuccessorWith(Dead, Live);
8122 }
8123 };
8124
8125 // Try to eliminate duplicate predecessors.
8126 for (const auto &EBW : BBs2Merge) {
8127 // EBW is a candidate for simplification. If we find a duplicate BB,
8128 // replace it.
8129 const auto &[It, Inserted] = Keep.insert(&EBW);
8130 if (Inserted)
8131 continue;
8132
8133 // Found duplicate: merge P into canonical predecessor It->Pred.
8134 BasicBlock *KeepBB = (*It)->BB;
8135 BasicBlock *DeadBB = EBW.BB;
8136
8137 // Avoid merging a BB with itself.
8138 if (KeepBB == DeadBB)
8139 continue;
8140
8141 // Redirect all edges into DeadPred to KeepPred.
8142 RedirectIncomingEdges(DeadBB, KeepBB);
8143
8144 // Now DeadBB should become unreachable; leave DCE to later,
8145 // but we can try to simplify it if it only branches to Succ.
8146 // (We won't erase here to keep the routine simple and DT-safe.)
8147 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8148 MadeChange = true;
8149 }
8150
8151 if (DTU && !Updates.empty())
8152 DTU->applyUpdates(Updates);
8153
8154 return MadeChange;
8155}
8156
8157bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8158 DomTreeUpdater *DTU) {
8159 // Collect candidate switch-arms top-down.
8160 SmallSetVector<BasicBlock *, 16> FilteredArms(
8163 return mergeIdenticalBBs(FilteredArms.getArrayRef(), DTU);
8164}
8165
8166bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8167 DomTreeUpdater *DTU) {
8168 // Need at least 2 predecessors to do anything.
8169 if (!BB || !BB->hasNPredecessorsOrMore(2))
8170 return false;
8171
8172 // Compilation time consideration: retain the canonical loop, otherwise, we
8173 // require more time in the later loop canonicalization.
8174 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
8175 return false;
8176
8177 // Collect candidate predecessors bottom-up.
8178 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8181 return mergeIdenticalBBs(FilteredPreds.getArrayRef(), DTU);
8182}
8183
8184bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8185 BasicBlock *BB = SI->getParent();
8186
8187 if (isValueEqualityComparison(SI)) {
8188 // If we only have one predecessor, and if it is a branch on this value,
8189 // see if that predecessor totally determines the outcome of this switch.
8190 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8191 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8192 return requestResimplify();
8193
8194 Value *Cond = SI->getCondition();
8195 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8196 if (simplifySwitchOnSelect(SI, Select))
8197 return requestResimplify();
8198
8199 // If the block only contains the switch, see if we can fold the block
8200 // away into any preds.
8201 if (SI == &*BB->begin())
8202 if (foldValueComparisonIntoPredecessors(SI, Builder))
8203 return requestResimplify();
8204 }
8205
8206 // Try to transform the switch into an icmp and a branch.
8207 // The conversion from switch to comparison may lose information on
8208 // impossible switch values, so disable it early in the pipeline.
8209 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8210 return requestResimplify();
8211
8212 // Remove unreachable cases.
8213 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8214 return requestResimplify();
8215
8216 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8217 return requestResimplify();
8218
8219 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8220 return requestResimplify();
8221
8222 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8223 return requestResimplify();
8224
8225 // The conversion of switches to arithmetic or lookup table is disabled in
8226 // the early optimization pipeline, as it may lose information or make the
8227 // resulting code harder to analyze.
8228 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8229 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8230 Options.ConvertSwitchToLookupTable))
8231 return requestResimplify();
8232
8233 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8234 return requestResimplify();
8235
8236 if (reduceSwitchRange(SI, Builder, DL, TTI))
8237 return requestResimplify();
8238
8239 if (HoistCommon &&
8240 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8241 return requestResimplify();
8242
8243 // We can merge identical switch arms early to enhance more aggressive
8244 // optimization on switch.
8245 if (simplifyDuplicateSwitchArms(SI, DTU))
8246 return requestResimplify();
8247
8248 if (simplifySwitchWhenUMin(SI, DTU))
8249 return requestResimplify();
8250
8251 return false;
8252}
8253
8254bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8255 BasicBlock *BB = IBI->getParent();
8256 bool Changed = false;
8257 SmallVector<uint32_t> BranchWeights;
8258 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8259 extractBranchWeights(*IBI, BranchWeights);
8260
8261 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8262 if (HasBranchWeights)
8263 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8264 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8265
8266 // Eliminate redundant destinations.
8267 SmallPtrSet<Value *, 8> Succs;
8268 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8269 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8270 BasicBlock *Dest = IBI->getDestination(I);
8271 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8272 if (!Dest->hasAddressTaken())
8273 RemovedSuccs.insert(Dest);
8274 Dest->removePredecessor(BB);
8275 IBI->removeDestination(I);
8276 --I;
8277 --E;
8278 Changed = true;
8279 }
8280 }
8281
8282 if (DTU) {
8283 std::vector<DominatorTree::UpdateType> Updates;
8284 Updates.reserve(RemovedSuccs.size());
8285 for (auto *RemovedSucc : RemovedSuccs)
8286 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8287 DTU->applyUpdates(Updates);
8288 }
8289
8290 if (IBI->getNumDestinations() == 0) {
8291 // If the indirectbr has no successors, change it to unreachable.
8292 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8294 return true;
8295 }
8296
8297 if (IBI->getNumDestinations() == 1) {
8298 // If the indirectbr has one successor, change it to a direct branch.
8301 return true;
8302 }
8303 if (HasBranchWeights) {
8304 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8305 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8306 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8307 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8308 }
8309 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8310 if (simplifyIndirectBrOnSelect(IBI, SI))
8311 return requestResimplify();
8312 }
8313 return Changed;
8314}
8315
8316/// Given an block with only a single landing pad and a unconditional branch
8317/// try to find another basic block which this one can be merged with. This
8318/// handles cases where we have multiple invokes with unique landing pads, but
8319/// a shared handler.
8320///
8321/// We specifically choose to not worry about merging non-empty blocks
8322/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8323/// practice, the optimizer produces empty landing pad blocks quite frequently
8324/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8325/// sinking in this file)
8326///
8327/// This is primarily a code size optimization. We need to avoid performing
8328/// any transform which might inhibit optimization (such as our ability to
8329/// specialize a particular handler via tail commoning). We do this by not
8330/// merging any blocks which require us to introduce a phi. Since the same
8331/// values are flowing through both blocks, we don't lose any ability to
8332/// specialize. If anything, we make such specialization more likely.
8333///
8334/// TODO - This transformation could remove entries from a phi in the target
8335/// block when the inputs in the phi are the same for the two blocks being
8336/// merged. In some cases, this could result in removal of the PHI entirely.
8338 BasicBlock *BB, DomTreeUpdater *DTU) {
8339 auto Succ = BB->getUniqueSuccessor();
8340 assert(Succ);
8341 // If there's a phi in the successor block, we'd likely have to introduce
8342 // a phi into the merged landing pad block.
8343 if (isa<PHINode>(*Succ->begin()))
8344 return false;
8345
8346 for (BasicBlock *OtherPred : predecessors(Succ)) {
8347 if (BB == OtherPred)
8348 continue;
8349 BasicBlock::iterator I = OtherPred->begin();
8351 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8352 continue;
8353 ++I;
8355 if (!BI2 || !BI2->isIdenticalTo(BI))
8356 continue;
8357
8358 std::vector<DominatorTree::UpdateType> Updates;
8359
8360 // We've found an identical block. Update our predecessors to take that
8361 // path instead and make ourselves dead.
8363 for (BasicBlock *Pred : UniquePreds) {
8364 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8365 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8366 "unexpected successor");
8367 II->setUnwindDest(OtherPred);
8368 if (DTU) {
8369 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8370 Updates.push_back({DominatorTree::Delete, Pred, BB});
8371 }
8372 }
8373
8375 for (BasicBlock *Succ : UniqueSuccs) {
8376 Succ->removePredecessor(BB);
8377 if (DTU)
8378 Updates.push_back({DominatorTree::Delete, BB, Succ});
8379 }
8380
8381 IRBuilder<> Builder(BI);
8382 Builder.CreateUnreachable();
8383 BI->eraseFromParent();
8384 if (DTU)
8385 DTU->applyUpdates(Updates);
8386 return true;
8387 }
8388 return false;
8389}
8390
8391bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8392 IRBuilder<> &Builder) {
8393 BasicBlock *BB = BI->getParent();
8394 BasicBlock *Succ = BI->getSuccessor(0);
8395
8396 // If the Terminator is the only non-phi instruction, simplify the block.
8397 // If LoopHeader is provided, check if the block or its successor is a loop
8398 // header. (This is for early invocations before loop simplify and
8399 // vectorization to keep canonical loop forms for nested loops. These blocks
8400 // can be eliminated when the pass is invoked later in the back-end.)
8401 // Note that if BB has only one predecessor then we do not introduce new
8402 // backedge, so we can eliminate BB.
8403 bool NeedCanonicalLoop =
8404 Options.NeedCanonicalLoop &&
8405 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8406 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8408 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8409 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8410 return true;
8411
8412 // If the only instruction in the block is a seteq/setne comparison against a
8413 // constant, try to simplify the block.
8414 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8415 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8416 ++I;
8417 if (I->isTerminator() &&
8418 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8419 return true;
8420 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8421 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8422 Builder))
8423 return true;
8424 }
8425 }
8426
8427 // See if we can merge an empty landing pad block with another which is
8428 // equivalent.
8429 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8430 ++I;
8431 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8432 return true;
8433 }
8434
8435 return false;
8436}
8437
8439 BasicBlock *PredPred = nullptr;
8440 for (auto *P : predecessors(BB)) {
8441 BasicBlock *PPred = P->getSinglePredecessor();
8442 if (!PPred || (PredPred && PredPred != PPred))
8443 return nullptr;
8444 PredPred = PPred;
8445 }
8446 return PredPred;
8447}
8448
8449/// Fold the following pattern:
8450/// bb0:
8451/// br i1 %cond1, label %bb1, label %bb2
8452/// bb1:
8453/// br i1 %cond2, label %bb3, label %bb4
8454/// bb2:
8455/// br i1 %cond2, label %bb4, label %bb3
8456/// bb3:
8457/// ...
8458/// bb4:
8459/// ...
8460/// into
8461/// bb0:
8462/// %cond = xor i1 %cond1, %cond2
8463/// br i1 %cond, label %bb4, label %bb3
8464/// bb3:
8465/// ...
8466/// bb4:
8467/// ...
8468/// NOTE: %cond2 always dominates the terminator of bb0.
8470 BasicBlock *BB = BI->getParent();
8471 BasicBlock *BB1 = BI->getSuccessor(0);
8472 BasicBlock *BB2 = BI->getSuccessor(1);
8473 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8474 if (Succ == BB)
8475 return false;
8476 if (&Succ->front() != Succ->getTerminator())
8477 return false;
8478 SuccBI = dyn_cast<CondBrInst>(Succ->getTerminator());
8479 if (!SuccBI)
8480 return false;
8481 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8482 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8483 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8484 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8485 };
8486 CondBrInst *BB1BI, *BB2BI;
8487 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8488 return false;
8489
8490 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8491 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8492 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8493 return false;
8494
8495 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8496 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8497 IRBuilder<> Builder(BI);
8498 BI->setCondition(
8499 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8500 BB1->removePredecessor(BB);
8501 BI->setSuccessor(0, BB4);
8502 BB2->removePredecessor(BB);
8503 BI->setSuccessor(1, BB3);
8504 if (DTU) {
8506 Updates.push_back({DominatorTree::Delete, BB, BB1});
8507 Updates.push_back({DominatorTree::Insert, BB, BB4});
8508 Updates.push_back({DominatorTree::Delete, BB, BB2});
8509 Updates.push_back({DominatorTree::Insert, BB, BB3});
8510
8511 DTU->applyUpdates(Updates);
8512 }
8513 bool HasWeight = false;
8514 uint64_t BBTWeight, BBFWeight;
8515 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8516 HasWeight = true;
8517 else
8518 BBTWeight = BBFWeight = 1;
8519 uint64_t BB1TWeight, BB1FWeight;
8520 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8521 HasWeight = true;
8522 else
8523 BB1TWeight = BB1FWeight = 1;
8524 uint64_t BB2TWeight, BB2FWeight;
8525 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8526 HasWeight = true;
8527 else
8528 BB2TWeight = BB2FWeight = 1;
8529 if (HasWeight) {
8530 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8531 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8532 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8533 /*ElideAllZero=*/true);
8534 }
8535 return true;
8536}
8537
8538bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8539 assert(
8541 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8542 "Tautological conditional branch should have been eliminated already.");
8543
8544 BasicBlock *BB = BI->getParent();
8545 if (!Options.SimplifyCondBranch ||
8546 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8547 return false;
8548
8549 // Conditional branch
8550 if (isValueEqualityComparison(BI)) {
8551 // If we only have one predecessor, and if it is a branch on this value,
8552 // see if that predecessor totally determines the outcome of this
8553 // switch.
8554 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8555 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8556 return requestResimplify();
8557
8558 // This block must be empty, except for the setcond inst, if it exists.
8559 // Ignore pseudo intrinsics.
8560 for (auto &I : *BB) {
8561 if (isa<PseudoProbeInst>(I) ||
8562 &I == cast<Instruction>(BI->getCondition()))
8563 continue;
8564 if (&I == BI)
8565 if (foldValueComparisonIntoPredecessors(BI, Builder))
8566 return requestResimplify();
8567 break;
8568 }
8569 }
8570
8571 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8572 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8573 return true;
8574
8575 // If this basic block has dominating predecessor blocks and the dominating
8576 // blocks' conditions imply BI's condition, we know the direction of BI.
8577 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8578 if (Imp) {
8579 // Turn this into a branch on constant.
8580 auto *OldCond = BI->getCondition();
8581 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8582 : ConstantInt::getFalse(BB->getContext());
8583 BI->setCondition(TorF);
8585 return requestResimplify();
8586 }
8587
8588 // If this basic block is ONLY a compare and a branch, and if a predecessor
8589 // branches to us and one of our successors, fold the comparison into the
8590 // predecessor and use logical operations to pick the right destination.
8591 if (Options.SpeculateBlocks &&
8592 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8593 Options.BonusInstThreshold))
8594 return requestResimplify();
8595
8596 // We have a conditional branch to two blocks that are only reachable
8597 // from BI. We know that the condbr dominates the two blocks, so see if
8598 // there is any identical code in the "then" and "else" blocks. If so, we
8599 // can hoist it up to the branching block.
8600 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8601 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8602 if (HoistCommon &&
8603 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8604 return requestResimplify();
8605
8606 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8607 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8608 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8609 auto CanSpeculateConditionalLoadsStores = [&]() {
8610 for (auto *Succ : successors(BB)) {
8611 for (Instruction &I : *Succ) {
8612 if (I.isTerminator()) {
8613 if (I.getNumSuccessors() > 1)
8614 return false;
8615 continue;
8616 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8617 SpeculatedConditionalLoadsStores.size() ==
8619 return false;
8620 }
8621 SpeculatedConditionalLoadsStores.push_back(&I);
8622 }
8623 }
8624 return !SpeculatedConditionalLoadsStores.empty();
8625 };
8626
8627 if (CanSpeculateConditionalLoadsStores()) {
8628 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8629 std::nullopt, nullptr);
8630 return requestResimplify();
8631 }
8632 }
8633 } else {
8634 // If Successor #1 has multiple preds, we may be able to conditionally
8635 // execute Successor #0 if it branches to Successor #1.
8636 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8637 if (Succ0TI->getNumSuccessors() == 1 &&
8638 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8639 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8640 return requestResimplify();
8641 }
8642 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8643 // If Successor #0 has multiple preds, we may be able to conditionally
8644 // execute Successor #1 if it branches to Successor #0.
8645 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8646 if (Succ1TI->getNumSuccessors() == 1 &&
8647 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8648 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8649 return requestResimplify();
8650 }
8651
8652 // If this is a branch on something for which we know the constant value in
8653 // predecessors (e.g. a phi node in the current block), thread control
8654 // through this block.
8655 if (foldCondBranchOnValueKnownInPredecessor(BI))
8656 return requestResimplify();
8657
8658 // Scan predecessor blocks for conditional branches.
8659 for (BasicBlock *Pred : predecessors(BB))
8660 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Pred->getTerminator()))
8661 if (PBI != BI)
8662 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8663 return requestResimplify();
8664
8665 // Look for diamond patterns.
8666 if (MergeCondStores)
8667 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8668 if (CondBrInst *PBI = dyn_cast<CondBrInst>(PrevBB->getTerminator()))
8669 if (PBI != BI)
8670 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8671 return requestResimplify();
8672
8673 // Look for nested conditional branches.
8674 if (mergeNestedCondBranch(BI, DTU))
8675 return requestResimplify();
8676
8677 return false;
8678}
8679
8680/// Check if passing a value to an instruction will cause undefined behavior.
8681static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8682 assert(V->getType() == I->getType() && "Mismatched types");
8684 if (!C)
8685 return false;
8686
8687 if (I->use_empty())
8688 return false;
8689
8690 if (C->isNullValue() || isa<UndefValue>(C)) {
8691 // Only look at the first use we can handle, avoid hurting compile time with
8692 // long uselists
8693 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8694 auto *Use = cast<Instruction>(U.getUser());
8695 // Change this list when we want to add new instructions.
8696 switch (Use->getOpcode()) {
8697 default:
8698 return false;
8699 case Instruction::GetElementPtr:
8700 case Instruction::Ret:
8701 case Instruction::BitCast:
8702 case Instruction::Load:
8703 case Instruction::Store:
8704 case Instruction::Call:
8705 case Instruction::CallBr:
8706 case Instruction::Invoke:
8707 case Instruction::UDiv:
8708 case Instruction::URem:
8709 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8710 // implemented to avoid code complexity as it is unclear how useful such
8711 // logic is.
8712 case Instruction::SDiv:
8713 case Instruction::SRem:
8714 return true;
8715 }
8716 });
8717 if (FindUse == I->use_end())
8718 return false;
8719 auto &Use = *FindUse;
8720 auto *User = cast<Instruction>(Use.getUser());
8721 // Bail out if User is not in the same BB as I or User == I or User comes
8722 // before I in the block. The latter two can be the case if User is a
8723 // PHI node.
8724 if (User->getParent() != I->getParent() || User == I ||
8725 User->comesBefore(I))
8726 return false;
8727
8728 // Now make sure that there are no instructions in between that can alter
8729 // control flow (eg. calls)
8730 auto InstrRange =
8731 make_range(std::next(I->getIterator()), User->getIterator());
8732 if (any_of(InstrRange, [](Instruction &I) {
8734 }))
8735 return false;
8736
8737 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8739 if (GEP->getPointerOperand() == I) {
8740 // The type of GEP may differ from the type of base pointer.
8741 // Bail out on vector GEPs, as they are not handled by other checks.
8742 if (GEP->getType()->isVectorTy())
8743 return false;
8744 // The current base address is null, there are four cases to consider:
8745 // getelementptr (TY, null, 0) -> null
8746 // getelementptr (TY, null, not zero) -> may be modified
8747 // getelementptr inbounds (TY, null, 0) -> null
8748 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8749 // undefined?
8750 if (!GEP->hasAllZeroIndices() &&
8751 (!GEP->isInBounds() ||
8752 NullPointerIsDefined(GEP->getFunction(),
8753 GEP->getPointerAddressSpace())))
8754 PtrValueMayBeModified = true;
8755 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8756 }
8757
8758 // Look through return.
8759 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8760 bool HasNoUndefAttr =
8761 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8762 // Return undefined to a noundef return value is undefined.
8763 if (isa<UndefValue>(C) && HasNoUndefAttr)
8764 return true;
8765 // Return null to a nonnull+noundef return value is undefined.
8766 if (C->isNullValue() && HasNoUndefAttr &&
8767 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8768 return !PtrValueMayBeModified;
8769 }
8770 }
8771
8772 // Load from null is undefined.
8773 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8774 if (!LI->isVolatile())
8775 return !NullPointerIsDefined(LI->getFunction(),
8776 LI->getPointerAddressSpace());
8777
8778 // Store to null is undefined.
8780 if (!SI->isVolatile())
8781 return (!NullPointerIsDefined(SI->getFunction(),
8782 SI->getPointerAddressSpace())) &&
8783 SI->getPointerOperand() == I;
8784
8785 // llvm.assume(false/undef) always triggers immediate UB.
8786 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8787 // Ignore assume operand bundles.
8788 if (I == Assume->getArgOperand(0))
8789 return true;
8790 }
8791
8792 if (auto *CB = dyn_cast<CallBase>(User)) {
8793 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8794 return false;
8795 // A call to null is undefined.
8796 if (CB->getCalledOperand() == I)
8797 return true;
8798
8799 if (CB->isArgOperand(&Use)) {
8800 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8801 // Passing null to a nonnnull+noundef argument is undefined.
8803 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8804 return !PtrValueMayBeModified;
8805 // Passing undef to a noundef argument is undefined.
8806 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8807 return true;
8808 }
8809 }
8810 // Div/Rem by zero is immediate UB
8811 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8812 return true;
8813 }
8814 return false;
8815}
8816
8817/// If BB has an incoming value that will always trigger undefined behavior
8818/// (eg. null pointer dereference), remove the branch leading here.
8820 DomTreeUpdater *DTU,
8821 AssumptionCache *AC) {
8822 for (PHINode &PHI : BB->phis())
8823 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8824 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8825 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8826 Instruction *T = Predecessor->getTerminator();
8827 IRBuilder<> Builder(T);
8828 if (isa<UncondBrInst>(T)) {
8829 BB->removePredecessor(Predecessor);
8830 // Turn unconditional branches into unreachables.
8831 Builder.CreateUnreachable();
8832 T->eraseFromParent();
8833 if (DTU)
8834 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8835 return true;
8836 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(T)) {
8837 BB->removePredecessor(Predecessor);
8838 // Preserve guarding condition in assume, because it might not be
8839 // inferrable from any dominating condition.
8840 Value *Cond = BI->getCondition();
8841 CallInst *Assumption;
8842 if (BI->getSuccessor(0) == BB)
8843 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8844 else
8845 Assumption = Builder.CreateAssumption(Cond);
8846 if (AC)
8847 AC->registerAssumption(cast<AssumeInst>(Assumption));
8848 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8849 : BI->getSuccessor(0));
8850 BI->eraseFromParent();
8851 if (DTU)
8852 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8853 return true;
8854 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8855 // Redirect all branches leading to UB into
8856 // a newly created unreachable block.
8857 BasicBlock *Unreachable = BasicBlock::Create(
8858 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8859 Builder.SetInsertPoint(Unreachable);
8860 // The new block contains only one instruction: Unreachable
8861 Builder.CreateUnreachable();
8862 for (const auto &Case : SI->cases())
8863 if (Case.getCaseSuccessor() == BB) {
8864 BB->removePredecessor(Predecessor);
8865 Case.setSuccessor(Unreachable);
8866 }
8867 if (SI->getDefaultDest() == BB) {
8868 BB->removePredecessor(Predecessor);
8869 SI->setDefaultDest(Unreachable);
8870 }
8871
8872 if (DTU)
8873 DTU->applyUpdates(
8874 { { DominatorTree::Insert, Predecessor, Unreachable },
8875 { DominatorTree::Delete, Predecessor, BB } });
8876 return true;
8877 }
8878 }
8879
8880 return false;
8881}
8882
8883bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8884 bool Changed = false;
8885
8886 assert(BB && BB->getParent() && "Block not embedded in function!");
8887 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8888
8889 // Remove basic blocks that have no predecessors (except the entry block)...
8890 // or that just have themself as a predecessor. These are unreachable.
8891 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8892 BB->getSinglePredecessor() == BB) {
8893 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8894 DeleteDeadBlock(BB, DTU);
8895 return true;
8896 }
8897
8898 // Check to see if we can constant propagate this terminator instruction
8899 // away...
8900 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8901 /*TLI=*/nullptr, DTU);
8902
8903 // Check for and eliminate duplicate PHI nodes in this block.
8905
8906 // Check for and remove branches that will always cause undefined behavior.
8908 return requestResimplify();
8909
8910 // Merge basic blocks into their predecessor if there is only one distinct
8911 // pred, and if there is only one distinct successor of the predecessor, and
8912 // if there are no PHI nodes.
8913 if (MergeBlockIntoPredecessor(BB, DTU))
8914 return true;
8915
8916 if (SinkCommon && Options.SinkCommonInsts) {
8917 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8918 mergeCompatibleInvokes(BB, DTU)) {
8919 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8920 // so we may now how duplicate PHI's.
8921 // Let's rerun EliminateDuplicatePHINodes() first,
8922 // before foldTwoEntryPHINode() potentially converts them into select's,
8923 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8924 return true;
8925 }
8926 // Merge identical predecessors of this block.
8927 if (simplifyDuplicatePredecessors(BB, DTU))
8928 return true;
8929 }
8930
8931 if (Options.SpeculateBlocks &&
8932 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8933 // If there is a trivial two-entry PHI node in this basic block, and we can
8934 // eliminate it, do so now.
8935 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8936 if (PN->getNumIncomingValues() == 2)
8937 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8938 Options.SpeculateUnpredictables))
8939 return true;
8940 }
8941
8942 IRBuilder<> Builder(BB);
8944 Builder.SetInsertPoint(Terminator);
8945 switch (Terminator->getOpcode()) {
8946 case Instruction::UncondBr:
8947 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
8948 break;
8949 case Instruction::CondBr:
8950 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
8951 break;
8952 case Instruction::Resume:
8953 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8954 break;
8955 case Instruction::CleanupRet:
8956 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8957 break;
8958 case Instruction::Switch:
8959 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8960 break;
8961 case Instruction::Unreachable:
8962 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8963 break;
8964 case Instruction::IndirectBr:
8965 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8966 break;
8967 }
8968
8969 return Changed;
8970}
8971
8972bool SimplifyCFGOpt::run(BasicBlock *BB) {
8973 bool Changed = false;
8974
8975 // Repeated simplify BB as long as resimplification is requested.
8976 do {
8977 Resimplify = false;
8978
8979 // Perform one round of simplifcation. Resimplify flag will be set if
8980 // another iteration is requested.
8981 Changed |= simplifyOnce(BB);
8982 } while (Resimplify);
8983
8984 return Changed;
8985}
8986
8989 ArrayRef<WeakVH> LoopHeaders) {
8990 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8991 Options)
8992 .run(BB);
8993}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool isProfitableToSpeculate(const CondBrInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static void hoistConditionalLoadsStores(CondBrInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool mergeIdenticalBBs(ArrayRef< BasicBlock * > Candidates, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1589
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1981
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
back - Get the last element.
Definition ArrayRef.h:151
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:1100
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:986
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:932
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1291
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:701
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2347
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2095
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1223
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2650
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1539
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1975
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1217
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1835
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1246
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2331
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1877
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1890
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1429
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2189
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2063
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2272
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2441
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1599
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1463
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:310
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
void setSuccessor(BasicBlock *NewSucc)
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1606
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:883
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:808
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
bool succ_empty(const Instruction *I)
Definition CFG.h:153
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
@ Dead
Unused definition.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1702
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1791
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI CondBrInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
LLVM_ABI void InvertBranch(CondBrInst *PBI, IRBuilderBase &Builder)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3110
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3392
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3899
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI bool foldBranchToCommonDest(CondBrInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1596
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1524
@ Keep
No function return thunk.
Definition CodeGen.h:162
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two BBs are equal depends on the contents of the BasicBlock and the incoming values ...
SmallDenseMap< BasicBlock *, Value *, 8 > BB2ValueMap
Phi2IVsMap * PhiPredIVs
DenseMap< PHINode *, BB2ValueMap > Phi2IVsMap
static bool canBeMerged(const BasicBlock *BB)
BasicBlock * BB
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const EqualBBWrapper * getEmptyKey()
static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS)
static unsigned getHashValue(const EqualBBWrapper *EBW)
static const EqualBBWrapper * getTombstoneKey()
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:312
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276