LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306 SelectInst *Select,
307 IRBuilder<> &Builder);
308 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
309 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
310 Instruction *TI, Instruction *I1,
311 SmallVectorImpl<Instruction *> &OtherSuccTIs,
312 ArrayRef<BasicBlock *> UniqueSuccessors);
313 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
314 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
315 BasicBlock *TrueBB, BasicBlock *FalseBB,
316 uint32_t TrueWeight, uint32_t FalseWeight);
317 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
318 const DataLayout &DL);
319 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
320 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
321 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
853 if (BI->isConditional())
854 Cond = dyn_cast<Instruction>(BI->getCondition());
855 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
856 Cond = dyn_cast<Instruction>(IBI->getAddress());
857 }
858
859 TI->eraseFromParent();
860 if (Cond)
862}
863
864/// Return true if the specified terminator checks
865/// to see if a value is equal to constant integer value.
866Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
867 Value *CV = nullptr;
868 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
869 // Do not permit merging of large switch instructions into their
870 // predecessors unless there is only one predecessor.
871 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
872 CV = SI->getCondition();
873 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
874 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
875 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
876 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
877 CV = ICI->getOperand(0);
878 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
879 if (Trunc->hasNoUnsignedWrap())
880 CV = Trunc->getOperand(0);
881 }
882 }
883
884 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
885 if (CV) {
886 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
887 Value *Ptr = PTII->getPointerOperand();
888 if (DL.hasUnstableRepresentation(Ptr->getType()))
889 return CV;
890 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
891 CV = Ptr;
892 }
893 }
894 return CV;
895}
896
897/// Given a value comparison instruction,
898/// decode all of the 'cases' that it represents and return the 'default' block.
899BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
900 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
901 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
902 Cases.reserve(SI->getNumCases());
903 for (auto Case : SI->cases())
904 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
905 Case.getCaseSuccessor()));
906 return SI->getDefaultDest();
907 }
908
909 BranchInst *BI = cast<BranchInst>(TI);
910 Value *Cond = BI->getCondition();
911 ICmpInst::Predicate Pred;
912 ConstantInt *C;
913 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
914 Pred = ICI->getPredicate();
915 C = getConstantInt(ICI->getOperand(1), DL);
916 } else {
917 Pred = ICmpInst::ICMP_NE;
918 auto *Trunc = cast<TruncInst>(Cond);
919 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
920 }
921 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
922 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
923 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
924}
925
926/// Given a vector of bb/value pairs, remove any entries
927/// in the list that match the specified block.
928static void
930 std::vector<ValueEqualityComparisonCase> &Cases) {
931 llvm::erase(Cases, BB);
932}
933
934/// Return true if there are any keys in C1 that exist in C2 as well.
935static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
936 std::vector<ValueEqualityComparisonCase> &C2) {
937 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
938
939 // Make V1 be smaller than V2.
940 if (V1->size() > V2->size())
941 std::swap(V1, V2);
942
943 if (V1->empty())
944 return false;
945 if (V1->size() == 1) {
946 // Just scan V2.
947 ConstantInt *TheVal = (*V1)[0].Value;
948 for (const ValueEqualityComparisonCase &VECC : *V2)
949 if (TheVal == VECC.Value)
950 return true;
951 }
952
953 // Otherwise, just sort both lists and compare element by element.
954 array_pod_sort(V1->begin(), V1->end());
955 array_pod_sort(V2->begin(), V2->end());
956 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
957 while (i1 != e1 && i2 != e2) {
958 if ((*V1)[i1].Value == (*V2)[i2].Value)
959 return true;
960 if ((*V1)[i1].Value < (*V2)[i2].Value)
961 ++i1;
962 else
963 ++i2;
964 }
965 return false;
966}
967
968/// If TI is known to be a terminator instruction and its block is known to
969/// only have a single predecessor block, check to see if that predecessor is
970/// also a value comparison with the same value, and if that comparison
971/// determines the outcome of this comparison. If so, simplify TI. This does a
972/// very limited form of jump threading.
973bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
974 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
975 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
976 if (!PredVal)
977 return false; // Not a value comparison in predecessor.
978
979 Value *ThisVal = isValueEqualityComparison(TI);
980 assert(ThisVal && "This isn't a value comparison!!");
981 if (ThisVal != PredVal)
982 return false; // Different predicates.
983
984 // TODO: Preserve branch weight metadata, similarly to how
985 // foldValueComparisonIntoPredecessors preserves it.
986
987 // Find out information about when control will move from Pred to TI's block.
988 std::vector<ValueEqualityComparisonCase> PredCases;
989 BasicBlock *PredDef =
990 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
991 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
992
993 // Find information about how control leaves this block.
994 std::vector<ValueEqualityComparisonCase> ThisCases;
995 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
996 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
997
998 // If TI's block is the default block from Pred's comparison, potentially
999 // simplify TI based on this knowledge.
1000 if (PredDef == TI->getParent()) {
1001 // If we are here, we know that the value is none of those cases listed in
1002 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1003 // can simplify TI.
1004 if (!valuesOverlap(PredCases, ThisCases))
1005 return false;
1006
1007 if (isa<BranchInst>(TI)) {
1008 // Okay, one of the successors of this condbr is dead. Convert it to a
1009 // uncond br.
1010 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1011 // Insert the new branch.
1012 Instruction *NI = Builder.CreateBr(ThisDef);
1013 (void)NI;
1014
1015 // Remove PHI node entries for the dead edge.
1016 ThisCases[0].Dest->removePredecessor(PredDef);
1017
1018 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1019 << "Through successor TI: " << *TI << "Leaving: " << *NI
1020 << "\n");
1021
1023
1024 if (DTU)
1025 DTU->applyUpdates(
1026 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1027
1028 return true;
1029 }
1030
1031 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1032 // Okay, TI has cases that are statically dead, prune them away.
1033 SmallPtrSet<Constant *, 16> DeadCases;
1034 for (const ValueEqualityComparisonCase &Case : PredCases)
1035 DeadCases.insert(Case.Value);
1036
1037 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1038 << "Through successor TI: " << *TI);
1039
1040 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1041 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1042 --i;
1043 auto *Successor = i->getCaseSuccessor();
1044 if (DTU)
1045 ++NumPerSuccessorCases[Successor];
1046 if (DeadCases.count(i->getCaseValue())) {
1047 Successor->removePredecessor(PredDef);
1048 SI.removeCase(i);
1049 if (DTU)
1050 --NumPerSuccessorCases[Successor];
1051 }
1052 }
1053
1054 if (DTU) {
1055 std::vector<DominatorTree::UpdateType> Updates;
1056 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1057 if (I.second == 0)
1058 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1059 DTU->applyUpdates(Updates);
1060 }
1061
1062 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1063 return true;
1064 }
1065
1066 // Otherwise, TI's block must correspond to some matched value. Find out
1067 // which value (or set of values) this is.
1068 ConstantInt *TIV = nullptr;
1069 BasicBlock *TIBB = TI->getParent();
1070 for (const auto &[Value, Dest] : PredCases)
1071 if (Dest == TIBB) {
1072 if (TIV)
1073 return false; // Cannot handle multiple values coming to this block.
1074 TIV = Value;
1075 }
1076 assert(TIV && "No edge from pred to succ?");
1077
1078 // Okay, we found the one constant that our value can be if we get into TI's
1079 // BB. Find out which successor will unconditionally be branched to.
1080 BasicBlock *TheRealDest = nullptr;
1081 for (const auto &[Value, Dest] : ThisCases)
1082 if (Value == TIV) {
1083 TheRealDest = Dest;
1084 break;
1085 }
1086
1087 // If not handled by any explicit cases, it is handled by the default case.
1088 if (!TheRealDest)
1089 TheRealDest = ThisDef;
1090
1091 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1092
1093 // Remove PHI node entries for dead edges.
1094 BasicBlock *CheckEdge = TheRealDest;
1095 for (BasicBlock *Succ : successors(TIBB))
1096 if (Succ != CheckEdge) {
1097 if (Succ != TheRealDest)
1098 RemovedSuccs.insert(Succ);
1099 Succ->removePredecessor(TIBB);
1100 } else
1101 CheckEdge = nullptr;
1102
1103 // Insert the new branch.
1104 Instruction *NI = Builder.CreateBr(TheRealDest);
1105 (void)NI;
1106
1107 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1108 << "Through successor TI: " << *TI << "Leaving: " << *NI
1109 << "\n");
1110
1112 if (DTU) {
1113 SmallVector<DominatorTree::UpdateType, 2> Updates;
1114 Updates.reserve(RemovedSuccs.size());
1115 for (auto *RemovedSucc : RemovedSuccs)
1116 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1117 DTU->applyUpdates(Updates);
1118 }
1119 return true;
1120}
1121
1122namespace {
1123
1124/// This class implements a stable ordering of constant
1125/// integers that does not depend on their address. This is important for
1126/// applications that sort ConstantInt's to ensure uniqueness.
1127struct ConstantIntOrdering {
1128 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1129 return LHS->getValue().ult(RHS->getValue());
1130 }
1131};
1132
1133} // end anonymous namespace
1134
1136 ConstantInt *const *P2) {
1137 const ConstantInt *LHS = *P1;
1138 const ConstantInt *RHS = *P2;
1139 if (LHS == RHS)
1140 return 0;
1141 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1142}
1143
1144/// Get Weights of a given terminator, the default weight is at the front
1145/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1146/// metadata.
1148 SmallVectorImpl<uint64_t> &Weights) {
1149 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1150 assert(MD && "Invalid branch-weight metadata");
1151 extractFromBranchWeightMD64(MD, Weights);
1152
1153 // If TI is a conditional eq, the default case is the false case,
1154 // and the corresponding branch-weight data is at index 2. We swap the
1155 // default weight to be the first entry.
1156 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1157 assert(Weights.size() == 2);
1158 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1159 if (!ICI)
1160 return;
1161
1162 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1163 std::swap(Weights.front(), Weights.back());
1164 }
1165}
1166
1168 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1169 Instruction *PTI = PredBlock->getTerminator();
1170
1171 // If we have bonus instructions, clone them into the predecessor block.
1172 // Note that there may be multiple predecessor blocks, so we cannot move
1173 // bonus instructions to a predecessor block.
1174 for (Instruction &BonusInst : *BB) {
1175 if (BonusInst.isTerminator())
1176 continue;
1177
1178 Instruction *NewBonusInst = BonusInst.clone();
1179
1180 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1181 // Unless the instruction has the same !dbg location as the original
1182 // branch, drop it. When we fold the bonus instructions we want to make
1183 // sure we reset their debug locations in order to avoid stepping on
1184 // dead code caused by folding dead branches.
1185 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1186 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1187 mapAtomInstance(DL, VMap);
1188 }
1189
1190 RemapInstruction(NewBonusInst, VMap,
1192
1193 // If we speculated an instruction, we need to drop any metadata that may
1194 // result in undefined behavior, as the metadata might have been valid
1195 // only given the branch precondition.
1196 // Similarly strip attributes on call parameters that may cause UB in
1197 // location the call is moved to.
1198 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1199
1200 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1201 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1202 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1204
1205 NewBonusInst->takeName(&BonusInst);
1206 BonusInst.setName(NewBonusInst->getName() + ".old");
1207 VMap[&BonusInst] = NewBonusInst;
1208
1209 // Update (liveout) uses of bonus instructions,
1210 // now that the bonus instruction has been cloned into predecessor.
1211 // Note that we expect to be in a block-closed SSA form for this to work!
1212 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1213 auto *UI = cast<Instruction>(U.getUser());
1214 auto *PN = dyn_cast<PHINode>(UI);
1215 if (!PN) {
1216 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1217 "If the user is not a PHI node, then it should be in the same "
1218 "block as, and come after, the original bonus instruction.");
1219 continue; // Keep using the original bonus instruction.
1220 }
1221 // Is this the block-closed SSA form PHI node?
1222 if (PN->getIncomingBlock(U) == BB)
1223 continue; // Great, keep using the original bonus instruction.
1224 // The only other alternative is an "use" when coming from
1225 // the predecessor block - here we should refer to the cloned bonus instr.
1226 assert(PN->getIncomingBlock(U) == PredBlock &&
1227 "Not in block-closed SSA form?");
1228 U.set(NewBonusInst);
1229 }
1230 }
1231
1232 // Key Instructions: We may have propagated atom info into the pred. If the
1233 // pred's terminator already has atom info do nothing as merging would drop
1234 // one atom group anyway. If it doesn't, propagte the remapped atom group
1235 // from BB's terminator.
1236 if (auto &PredDL = PTI->getDebugLoc()) {
1237 auto &DL = BB->getTerminator()->getDebugLoc();
1238 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1239 PredDL.isSameSourceLocation(DL)) {
1240 PTI->setDebugLoc(DL);
1241 RemapSourceAtom(PTI, VMap);
1242 }
1243 }
1244}
1245
1246bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1247 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1248 BasicBlock *BB = TI->getParent();
1249 BasicBlock *Pred = PTI->getParent();
1250
1252
1253 // Figure out which 'cases' to copy from SI to PSI.
1254 std::vector<ValueEqualityComparisonCase> BBCases;
1255 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1256
1257 std::vector<ValueEqualityComparisonCase> PredCases;
1258 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1259
1260 // Based on whether the default edge from PTI goes to BB or not, fill in
1261 // PredCases and PredDefault with the new switch cases we would like to
1262 // build.
1263 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1264
1265 // Update the branch weight metadata along the way
1266 SmallVector<uint64_t, 8> Weights;
1267 bool PredHasWeights = hasBranchWeightMD(*PTI);
1268 bool SuccHasWeights = hasBranchWeightMD(*TI);
1269
1270 if (PredHasWeights) {
1271 getBranchWeights(PTI, Weights);
1272 // branch-weight metadata is inconsistent here.
1273 if (Weights.size() != 1 + PredCases.size())
1274 PredHasWeights = SuccHasWeights = false;
1275 } else if (SuccHasWeights)
1276 // If there are no predecessor weights but there are successor weights,
1277 // populate Weights with 1, which will later be scaled to the sum of
1278 // successor's weights
1279 Weights.assign(1 + PredCases.size(), 1);
1280
1281 SmallVector<uint64_t, 8> SuccWeights;
1282 if (SuccHasWeights) {
1283 getBranchWeights(TI, SuccWeights);
1284 // branch-weight metadata is inconsistent here.
1285 if (SuccWeights.size() != 1 + BBCases.size())
1286 PredHasWeights = SuccHasWeights = false;
1287 } else if (PredHasWeights)
1288 SuccWeights.assign(1 + BBCases.size(), 1);
1289
1290 if (PredDefault == BB) {
1291 // If this is the default destination from PTI, only the edges in TI
1292 // that don't occur in PTI, or that branch to BB will be activated.
1293 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1294 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1295 if (PredCases[i].Dest != BB)
1296 PTIHandled.insert(PredCases[i].Value);
1297 else {
1298 // The default destination is BB, we don't need explicit targets.
1299 std::swap(PredCases[i], PredCases.back());
1300
1301 if (PredHasWeights || SuccHasWeights) {
1302 // Increase weight for the default case.
1303 Weights[0] += Weights[i + 1];
1304 std::swap(Weights[i + 1], Weights.back());
1305 Weights.pop_back();
1306 }
1307
1308 PredCases.pop_back();
1309 --i;
1310 --e;
1311 }
1312
1313 // Reconstruct the new switch statement we will be building.
1314 if (PredDefault != BBDefault) {
1315 PredDefault->removePredecessor(Pred);
1316 if (DTU && PredDefault != BB)
1317 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1318 PredDefault = BBDefault;
1319 ++NewSuccessors[BBDefault];
1320 }
1321
1322 unsigned CasesFromPred = Weights.size();
1323 uint64_t ValidTotalSuccWeight = 0;
1324 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1325 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1326 PredCases.push_back(BBCases[i]);
1327 ++NewSuccessors[BBCases[i].Dest];
1328 if (SuccHasWeights || PredHasWeights) {
1329 // The default weight is at index 0, so weight for the ith case
1330 // should be at index i+1. Scale the cases from successor by
1331 // PredDefaultWeight (Weights[0]).
1332 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1333 ValidTotalSuccWeight += SuccWeights[i + 1];
1334 }
1335 }
1336
1337 if (SuccHasWeights || PredHasWeights) {
1338 ValidTotalSuccWeight += SuccWeights[0];
1339 // Scale the cases from predecessor by ValidTotalSuccWeight.
1340 for (unsigned i = 1; i < CasesFromPred; ++i)
1341 Weights[i] *= ValidTotalSuccWeight;
1342 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1343 Weights[0] *= SuccWeights[0];
1344 }
1345 } else {
1346 // If this is not the default destination from PSI, only the edges
1347 // in SI that occur in PSI with a destination of BB will be
1348 // activated.
1349 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1350 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1351 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1352 if (PredCases[i].Dest == BB) {
1353 PTIHandled.insert(PredCases[i].Value);
1354
1355 if (PredHasWeights || SuccHasWeights) {
1356 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1357 std::swap(Weights[i + 1], Weights.back());
1358 Weights.pop_back();
1359 }
1360
1361 std::swap(PredCases[i], PredCases.back());
1362 PredCases.pop_back();
1363 --i;
1364 --e;
1365 }
1366
1367 // Okay, now we know which constants were sent to BB from the
1368 // predecessor. Figure out where they will all go now.
1369 for (const ValueEqualityComparisonCase &Case : BBCases)
1370 if (PTIHandled.count(Case.Value)) {
1371 // If this is one we are capable of getting...
1372 if (PredHasWeights || SuccHasWeights)
1373 Weights.push_back(WeightsForHandled[Case.Value]);
1374 PredCases.push_back(Case);
1375 ++NewSuccessors[Case.Dest];
1376 PTIHandled.erase(Case.Value); // This constant is taken care of
1377 }
1378
1379 // If there are any constants vectored to BB that TI doesn't handle,
1380 // they must go to the default destination of TI.
1381 for (ConstantInt *I : PTIHandled) {
1382 if (PredHasWeights || SuccHasWeights)
1383 Weights.push_back(WeightsForHandled[I]);
1384 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1385 ++NewSuccessors[BBDefault];
1386 }
1387 }
1388
1389 // Okay, at this point, we know which new successor Pred will get. Make
1390 // sure we update the number of entries in the PHI nodes for these
1391 // successors.
1392 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1393 if (DTU) {
1394 SuccsOfPred = {llvm::from_range, successors(Pred)};
1395 Updates.reserve(Updates.size() + NewSuccessors.size());
1396 }
1397 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1398 NewSuccessors) {
1399 for (auto I : seq(NewSuccessor.second)) {
1400 (void)I;
1401 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1402 }
1403 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1404 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1405 }
1406
1407 Builder.SetInsertPoint(PTI);
1408 // Convert pointer to int before we switch.
1409 if (CV->getType()->isPointerTy()) {
1410 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1411 "Should not end up here with unstable pointers");
1412 CV =
1413 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1414 }
1415
1416 // Now that the successors are updated, create the new Switch instruction.
1417 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1418 NewSI->setDebugLoc(PTI->getDebugLoc());
1419 for (ValueEqualityComparisonCase &V : PredCases)
1420 NewSI->addCase(V.Value, V.Dest);
1421
1422 if (PredHasWeights || SuccHasWeights)
1423 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1424 /*ElideAllZero=*/true);
1425
1427
1428 // Okay, last check. If BB is still a successor of PSI, then we must
1429 // have an infinite loop case. If so, add an infinitely looping block
1430 // to handle the case to preserve the behavior of the code.
1431 BasicBlock *InfLoopBlock = nullptr;
1432 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1433 if (NewSI->getSuccessor(i) == BB) {
1434 if (!InfLoopBlock) {
1435 // Insert it at the end of the function, because it's either code,
1436 // or it won't matter if it's hot. :)
1437 InfLoopBlock =
1438 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1439 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1440 if (DTU)
1441 Updates.push_back(
1442 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1443 }
1444 NewSI->setSuccessor(i, InfLoopBlock);
1445 }
1446
1447 if (DTU) {
1448 if (InfLoopBlock)
1449 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1450
1451 Updates.push_back({DominatorTree::Delete, Pred, BB});
1452
1453 DTU->applyUpdates(Updates);
1454 }
1455
1456 ++NumFoldValueComparisonIntoPredecessors;
1457 return true;
1458}
1459
1460/// The specified terminator is a value equality comparison instruction
1461/// (either a switch or a branch on "X == c").
1462/// See if any of the predecessors of the terminator block are value comparisons
1463/// on the same value. If so, and if safe to do so, fold them together.
1464bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1465 IRBuilder<> &Builder) {
1466 BasicBlock *BB = TI->getParent();
1467 Value *CV = isValueEqualityComparison(TI); // CondVal
1468 assert(CV && "Not a comparison?");
1469
1470 bool Changed = false;
1471
1472 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1473 while (!Preds.empty()) {
1474 BasicBlock *Pred = Preds.pop_back_val();
1475 Instruction *PTI = Pred->getTerminator();
1476
1477 // Don't try to fold into itself.
1478 if (Pred == BB)
1479 continue;
1480
1481 // See if the predecessor is a comparison with the same value.
1482 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1483 if (PCV != CV)
1484 continue;
1485
1486 SmallSetVector<BasicBlock *, 4> FailBlocks;
1487 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1488 for (auto *Succ : FailBlocks) {
1489 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1490 return false;
1491 }
1492 }
1493
1494 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1495 Changed = true;
1496 }
1497 return Changed;
1498}
1499
1500// If we would need to insert a select that uses the value of this invoke
1501// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1502// need to do this), we can't hoist the invoke, as there is nowhere to put the
1503// select in this case.
1505 Instruction *I1, Instruction *I2) {
1506 for (BasicBlock *Succ : successors(BB1)) {
1507 for (const PHINode &PN : Succ->phis()) {
1508 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1509 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1510 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1511 return false;
1512 }
1513 }
1514 }
1515 return true;
1516}
1517
1518// Get interesting characteristics of instructions that
1519// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1520// instructions can be reordered across.
1526
1528 unsigned Flags = 0;
1529 if (I->mayReadFromMemory())
1530 Flags |= SkipReadMem;
1531 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1532 // inalloca) across stacksave/stackrestore boundaries.
1533 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1534 Flags |= SkipSideEffect;
1536 Flags |= SkipImplicitControlFlow;
1537 return Flags;
1538}
1539
1540// Returns true if it is safe to reorder an instruction across preceding
1541// instructions in a basic block.
1542static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1543 // Don't reorder a store over a load.
1544 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1545 return false;
1546
1547 // If we have seen an instruction with side effects, it's unsafe to reorder an
1548 // instruction which reads memory or itself has side effects.
1549 if ((Flags & SkipSideEffect) &&
1550 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1551 return false;
1552
1553 // Reordering across an instruction which does not necessarily transfer
1554 // control to the next instruction is speculation.
1556 return false;
1557
1558 // Hoisting of llvm.deoptimize is only legal together with the next return
1559 // instruction, which this pass is not always able to do.
1560 if (auto *CB = dyn_cast<CallBase>(I))
1561 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1562 return false;
1563
1564 // It's also unsafe/illegal to hoist an instruction above its instruction
1565 // operands
1566 BasicBlock *BB = I->getParent();
1567 for (Value *Op : I->operands()) {
1568 if (auto *J = dyn_cast<Instruction>(Op))
1569 if (J->getParent() == BB)
1570 return false;
1571 }
1572
1573 return true;
1574}
1575
1576static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1577
1578/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1579/// instructions \p I1 and \p I2 can and should be hoisted.
1581 const TargetTransformInfo &TTI) {
1582 // If we're going to hoist a call, make sure that the two instructions
1583 // we're commoning/hoisting are both marked with musttail, or neither of
1584 // them is marked as such. Otherwise, we might end up in a situation where
1585 // we hoist from a block where the terminator is a `ret` to a block where
1586 // the terminator is a `br`, and `musttail` calls expect to be followed by
1587 // a return.
1588 auto *C1 = dyn_cast<CallInst>(I1);
1589 auto *C2 = dyn_cast<CallInst>(I2);
1590 if (C1 && C2)
1591 if (C1->isMustTailCall() != C2->isMustTailCall())
1592 return false;
1593
1594 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1595 return false;
1596
1597 // If any of the two call sites has nomerge or convergent attribute, stop
1598 // hoisting.
1599 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1600 if (CB1->cannotMerge() || CB1->isConvergent())
1601 return false;
1602 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1603 if (CB2->cannotMerge() || CB2->isConvergent())
1604 return false;
1605
1606 return true;
1607}
1608
1609/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1610/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1611/// hoistCommonCodeFromSuccessors. e.g. The input:
1612/// I1 DVRs: { x, z },
1613/// OtherInsts: { I2 DVRs: { x, y, z } }
1614/// would result in hoisting only DbgVariableRecord x.
1616 Instruction *TI, Instruction *I1,
1617 SmallVectorImpl<Instruction *> &OtherInsts) {
1618 if (!I1->hasDbgRecords())
1619 return;
1620 using CurrentAndEndIt =
1621 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1622 // Vector of {Current, End} iterators.
1624 Itrs.reserve(OtherInsts.size() + 1);
1625 // Helper lambdas for lock-step checks:
1626 // Return true if this Current == End.
1627 auto atEnd = [](const CurrentAndEndIt &Pair) {
1628 return Pair.first == Pair.second;
1629 };
1630 // Return true if all Current are identical.
1631 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1632 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1634 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1635 });
1636 };
1637
1638 // Collect the iterators.
1639 Itrs.push_back(
1640 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1641 for (Instruction *Other : OtherInsts) {
1642 if (!Other->hasDbgRecords())
1643 return;
1644 Itrs.push_back(
1645 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1646 }
1647
1648 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1649 // the lock-step DbgRecord are identical, hoist all of them to TI.
1650 // This replicates the dbg.* intrinsic behaviour in
1651 // hoistCommonCodeFromSuccessors.
1652 while (none_of(Itrs, atEnd)) {
1653 bool HoistDVRs = allIdentical(Itrs);
1654 for (CurrentAndEndIt &Pair : Itrs) {
1655 // Increment Current iterator now as we may be about to move the
1656 // DbgRecord.
1657 DbgRecord &DR = *Pair.first++;
1658 if (HoistDVRs) {
1659 DR.removeFromParent();
1660 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1661 }
1662 }
1663 }
1664}
1665
1667 const Instruction *I2) {
1668 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1669 return true;
1670
1671 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1672 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1673 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1674 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1675 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1676
1677 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1678 return I1->getOperand(0) == I2->getOperand(1) &&
1679 I1->getOperand(1) == I2->getOperand(0) &&
1680 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1681 }
1682
1683 return false;
1684}
1685
1686/// If the target supports conditional faulting,
1687/// we look for the following pattern:
1688/// \code
1689/// BB:
1690/// ...
1691/// %cond = icmp ult %x, %y
1692/// br i1 %cond, label %TrueBB, label %FalseBB
1693/// FalseBB:
1694/// store i32 1, ptr %q, align 4
1695/// ...
1696/// TrueBB:
1697/// %maskedloadstore = load i32, ptr %b, align 4
1698/// store i32 %maskedloadstore, ptr %p, align 4
1699/// ...
1700/// \endcode
1701///
1702/// and transform it into:
1703///
1704/// \code
1705/// BB:
1706/// ...
1707/// %cond = icmp ult %x, %y
1708/// %maskedloadstore = cload i32, ptr %b, %cond
1709/// cstore i32 %maskedloadstore, ptr %p, %cond
1710/// cstore i32 1, ptr %q, ~%cond
1711/// br i1 %cond, label %TrueBB, label %FalseBB
1712/// FalseBB:
1713/// ...
1714/// TrueBB:
1715/// ...
1716/// \endcode
1717///
1718/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1719/// e.g.
1720///
1721/// \code
1722/// %vcond = bitcast i1 %cond to <1 x i1>
1723/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1724/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1725/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1726/// call void @llvm.masked.store.v1i32.p0
1727/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1728/// %cond.not = xor i1 %cond, true
1729/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1730/// call void @llvm.masked.store.v1i32.p0
1731/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1732/// \endcode
1733///
1734/// So we need to turn hoisted load/store into cload/cstore.
1735///
1736/// \param BI The branch instruction.
1737/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1738/// will be speculated.
1739/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1741 BranchInst *BI,
1742 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1743 std::optional<bool> Invert, Instruction *Sel) {
1744 auto &Context = BI->getParent()->getContext();
1745 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1746 auto *Cond = BI->getOperand(0);
1747 // Construct the condition if needed.
1748 BasicBlock *BB = BI->getParent();
1749 Value *Mask = nullptr;
1750 Value *MaskFalse = nullptr;
1751 Value *MaskTrue = nullptr;
1752 if (Invert.has_value()) {
1753 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1754 Mask = Builder.CreateBitCast(
1755 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1756 VCondTy);
1757 } else {
1758 IRBuilder<> Builder(BI);
1759 MaskFalse = Builder.CreateBitCast(
1760 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1761 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1762 }
1763 auto PeekThroughBitcasts = [](Value *V) {
1764 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1765 V = BitCast->getOperand(0);
1766 return V;
1767 };
1768 for (auto *I : SpeculatedConditionalLoadsStores) {
1769 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1770 if (!Invert.has_value())
1771 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1772 // We currently assume conditional faulting load/store is supported for
1773 // scalar types only when creating new instructions. This can be easily
1774 // extended for vector types in the future.
1775 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1776 auto *Op0 = I->getOperand(0);
1777 CallInst *MaskedLoadStore = nullptr;
1778 if (auto *LI = dyn_cast<LoadInst>(I)) {
1779 // Handle Load.
1780 auto *Ty = I->getType();
1781 PHINode *PN = nullptr;
1782 Value *PassThru = nullptr;
1783 if (Invert.has_value())
1784 for (User *U : I->users()) {
1785 if ((PN = dyn_cast<PHINode>(U))) {
1786 PassThru = Builder.CreateBitCast(
1787 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1788 FixedVectorType::get(Ty, 1));
1789 } else if (auto *Ins = cast<Instruction>(U);
1790 Sel && Ins->getParent() == BB) {
1791 // This happens when store or/and a speculative instruction between
1792 // load and store were hoisted to the BB. Make sure the masked load
1793 // inserted before its use.
1794 // We assume there's one of such use.
1795 Builder.SetInsertPoint(Ins);
1796 }
1797 }
1798 MaskedLoadStore = Builder.CreateMaskedLoad(
1799 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1800 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1801 if (PN)
1802 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1803 I->replaceAllUsesWith(NewLoadStore);
1804 } else {
1805 // Handle Store.
1806 auto *StoredVal = Builder.CreateBitCast(
1807 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1808 MaskedLoadStore = Builder.CreateMaskedStore(
1809 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1810 }
1811 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1812 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1813 //
1814 // !nonnull, !align : Not support pointer type, no need to keep.
1815 // !range: Load type is changed from scalar to vector, but the metadata on
1816 // vector specifies a per-element range, so the semantics stay the
1817 // same. Keep it.
1818 // !annotation: Not impact semantics. Keep it.
1819 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1820 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1821 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1822 // FIXME: DIAssignID is not supported for masked store yet.
1823 // (Verifier::visitDIAssignIDMetadata)
1825 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1826 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1827 });
1828 MaskedLoadStore->copyMetadata(*I);
1829 I->eraseFromParent();
1830 }
1831}
1832
1834 const TargetTransformInfo &TTI) {
1835 // Not handle volatile or atomic.
1836 bool IsStore = false;
1837 if (auto *L = dyn_cast<LoadInst>(I)) {
1838 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1839 return false;
1840 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1841 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1842 return false;
1843 IsStore = true;
1844 } else
1845 return false;
1846
1847 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1848 // That's why we have the alignment limitation.
1849 // FIXME: Update the prototype of the intrinsics?
1850 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1852}
1853
1854/// Hoist any common code in the successor blocks up into the block. This
1855/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1856/// given, only perform hoisting in case all successors blocks contain matching
1857/// instructions only. In that case, all instructions can be hoisted and the
1858/// original branch will be replaced and selects for PHIs are added.
1859bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1860 bool AllInstsEqOnly) {
1861 // This does very trivial matching, with limited scanning, to find identical
1862 // instructions in the two blocks. In particular, we don't want to get into
1863 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1864 // such, we currently just scan for obviously identical instructions in an
1865 // identical order, possibly separated by the same number of non-identical
1866 // instructions.
1867 BasicBlock *BB = TI->getParent();
1868 unsigned int SuccSize = succ_size(BB);
1869 if (SuccSize < 2)
1870 return false;
1871
1872 // If either of the blocks has it's address taken, then we can't do this fold,
1873 // because the code we'd hoist would no longer run when we jump into the block
1874 // by it's address.
1875 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1876 for (auto *Succ : UniqueSuccessors) {
1877 if (Succ->hasAddressTaken())
1878 return false;
1879 // Use getUniquePredecessor instead of getSinglePredecessor to support
1880 // multi-cases successors in switch.
1881 if (Succ->getUniquePredecessor())
1882 continue;
1883 // If Succ has >1 predecessors, continue to check if the Succ contains only
1884 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1885 // can relax the condition based on the assumptiom that the program would
1886 // never enter Succ and trigger such an UB.
1887 if (isa<UnreachableInst>(*Succ->begin()))
1888 continue;
1889 return false;
1890 }
1891 // The second of pair is a SkipFlags bitmask.
1892 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1893 SmallVector<SuccIterPair, 8> SuccIterPairs;
1894 for (auto *Succ : UniqueSuccessors) {
1895 BasicBlock::iterator SuccItr = Succ->begin();
1896 if (isa<PHINode>(*SuccItr))
1897 return false;
1898 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1899 }
1900
1901 if (AllInstsEqOnly) {
1902 // Check if all instructions in the successor blocks match. This allows
1903 // hoisting all instructions and removing the blocks we are hoisting from,
1904 // so does not add any new instructions.
1905
1906 // Check if sizes and terminators of all successors match.
1907 unsigned Size0 = UniqueSuccessors[0]->size();
1908 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1909 bool AllSame =
1910 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1911 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1912 Succ->size() == Size0;
1913 });
1914 if (!AllSame)
1915 return false;
1916 if (AllSame) {
1917 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1918 while (LRI.isValid()) {
1919 Instruction *I0 = (*LRI)[0];
1920 if (any_of(*LRI, [I0](Instruction *I) {
1921 return !areIdenticalUpToCommutativity(I0, I);
1922 })) {
1923 return false;
1924 }
1925 --LRI;
1926 }
1927 }
1928 // Now we know that all instructions in all successors can be hoisted. Let
1929 // the loop below handle the hoisting.
1930 }
1931
1932 // Count how many instructions were not hoisted so far. There's a limit on how
1933 // many instructions we skip, serving as a compilation time control as well as
1934 // preventing excessive increase of life ranges.
1935 unsigned NumSkipped = 0;
1936 // If we find an unreachable instruction at the beginning of a basic block, we
1937 // can still hoist instructions from the rest of the basic blocks.
1938 if (SuccIterPairs.size() > 2) {
1939 erase_if(SuccIterPairs,
1940 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1941 if (SuccIterPairs.size() < 2)
1942 return false;
1943 }
1944
1945 bool Changed = false;
1946
1947 for (;;) {
1948 auto *SuccIterPairBegin = SuccIterPairs.begin();
1949 auto &BB1ItrPair = *SuccIterPairBegin++;
1950 auto OtherSuccIterPairRange =
1951 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1952 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1953
1954 Instruction *I1 = &*BB1ItrPair.first;
1955
1956 bool AllInstsAreIdentical = true;
1957 bool HasTerminator = I1->isTerminator();
1958 for (auto &SuccIter : OtherSuccIterRange) {
1959 Instruction *I2 = &*SuccIter;
1960 HasTerminator |= I2->isTerminator();
1961 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1962 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1963 AllInstsAreIdentical = false;
1964 }
1965
1966 SmallVector<Instruction *, 8> OtherInsts;
1967 for (auto &SuccIter : OtherSuccIterRange)
1968 OtherInsts.push_back(&*SuccIter);
1969
1970 // If we are hoisting the terminator instruction, don't move one (making a
1971 // broken BB), instead clone it, and remove BI.
1972 if (HasTerminator) {
1973 // Even if BB, which contains only one unreachable instruction, is ignored
1974 // at the beginning of the loop, we can hoist the terminator instruction.
1975 // If any instructions remain in the block, we cannot hoist terminators.
1976 if (NumSkipped || !AllInstsAreIdentical) {
1977 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1978 return Changed;
1979 }
1980
1981 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1982 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1983 Changed;
1984 }
1985
1986 if (AllInstsAreIdentical) {
1987 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1988 AllInstsAreIdentical =
1989 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1990 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1991 Instruction *I2 = &*Pair.first;
1992 unsigned SkipFlagsBB2 = Pair.second;
1993 // Even if the instructions are identical, it may not
1994 // be safe to hoist them if we have skipped over
1995 // instructions with side effects or their operands
1996 // weren't hoisted.
1997 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1999 });
2000 }
2001
2002 if (AllInstsAreIdentical) {
2003 BB1ItrPair.first++;
2004 // For a normal instruction, we just move one to right before the
2005 // branch, then replace all uses of the other with the first. Finally,
2006 // we remove the now redundant second instruction.
2007 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2008 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2009 // and leave any that were not hoisted behind (by calling moveBefore
2010 // rather than moveBeforePreserving).
2011 I1->moveBefore(TI->getIterator());
2012 for (auto &SuccIter : OtherSuccIterRange) {
2013 Instruction *I2 = &*SuccIter++;
2014 assert(I2 != I1);
2015 if (!I2->use_empty())
2016 I2->replaceAllUsesWith(I1);
2017 I1->andIRFlags(I2);
2018 if (auto *CB = dyn_cast<CallBase>(I1)) {
2019 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2020 assert(Success && "We should not be trying to hoist callbases "
2021 "with non-intersectable attributes");
2022 // For NDEBUG Compile.
2023 (void)Success;
2024 }
2025
2026 combineMetadataForCSE(I1, I2, true);
2027 // I1 and I2 are being combined into a single instruction. Its debug
2028 // location is the merged locations of the original instructions.
2029 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2030 I2->eraseFromParent();
2031 }
2032 if (!Changed)
2033 NumHoistCommonCode += SuccIterPairs.size();
2034 Changed = true;
2035 NumHoistCommonInstrs += SuccIterPairs.size();
2036 } else {
2037 if (NumSkipped >= HoistCommonSkipLimit) {
2038 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2039 return Changed;
2040 }
2041 // We are about to skip over a pair of non-identical instructions. Record
2042 // if any have characteristics that would prevent reordering instructions
2043 // across them.
2044 for (auto &SuccIterPair : SuccIterPairs) {
2045 Instruction *I = &*SuccIterPair.first++;
2046 SuccIterPair.second |= skippedInstrFlags(I);
2047 }
2048 ++NumSkipped;
2049 }
2050 }
2051}
2052
2053bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2054 Instruction *TI, Instruction *I1,
2055 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2056 ArrayRef<BasicBlock *> UniqueSuccessors) {
2057
2058 auto *BI = dyn_cast<BranchInst>(TI);
2059
2060 bool Changed = false;
2061 BasicBlock *TIParent = TI->getParent();
2062 BasicBlock *BB1 = I1->getParent();
2063
2064 // Use only for an if statement.
2065 auto *I2 = *OtherSuccTIs.begin();
2066 auto *BB2 = I2->getParent();
2067 if (BI) {
2068 assert(OtherSuccTIs.size() == 1);
2069 assert(BI->getSuccessor(0) == I1->getParent());
2070 assert(BI->getSuccessor(1) == I2->getParent());
2071 }
2072
2073 // In the case of an if statement, we try to hoist an invoke.
2074 // FIXME: Can we define a safety predicate for CallBr?
2075 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2076 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2077 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2078 return false;
2079
2080 // TODO: callbr hoisting currently disabled pending further study.
2081 if (isa<CallBrInst>(I1))
2082 return false;
2083
2084 for (BasicBlock *Succ : successors(BB1)) {
2085 for (PHINode &PN : Succ->phis()) {
2086 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2087 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2088 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2089 if (BB1V == BB2V)
2090 continue;
2091
2092 // In the case of an if statement, check for
2093 // passingValueIsAlwaysUndefined here because we would rather eliminate
2094 // undefined control flow then converting it to a select.
2095 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2097 return false;
2098 }
2099 }
2100 }
2101
2102 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2103 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2104 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2105 // Clone the terminator and hoist it into the pred, without any debug info.
2106 Instruction *NT = I1->clone();
2107 NT->insertInto(TIParent, TI->getIterator());
2108 if (!NT->getType()->isVoidTy()) {
2109 I1->replaceAllUsesWith(NT);
2110 for (Instruction *OtherSuccTI : OtherSuccTIs)
2111 OtherSuccTI->replaceAllUsesWith(NT);
2112 NT->takeName(I1);
2113 }
2114 Changed = true;
2115 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2116
2117 // Ensure terminator gets a debug location, even an unknown one, in case
2118 // it involves inlinable calls.
2120 Locs.push_back(I1->getDebugLoc());
2121 for (auto *OtherSuccTI : OtherSuccTIs)
2122 Locs.push_back(OtherSuccTI->getDebugLoc());
2123 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2124
2125 // PHIs created below will adopt NT's merged DebugLoc.
2126 IRBuilder<NoFolder> Builder(NT);
2127
2128 // In the case of an if statement, hoisting one of the terminators from our
2129 // successor is a great thing. Unfortunately, the successors of the if/else
2130 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2131 // must agree for all PHI nodes, so we insert select instruction to compute
2132 // the final result.
2133 if (BI) {
2134 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2135 for (BasicBlock *Succ : successors(BB1)) {
2136 for (PHINode &PN : Succ->phis()) {
2137 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2138 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2139 if (BB1V == BB2V)
2140 continue;
2141
2142 // These values do not agree. Insert a select instruction before NT
2143 // that determines the right value.
2144 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2145 if (!SI) {
2146 // Propagate fast-math-flags from phi node to its replacement select.
2148 BI->getCondition(), BB1V, BB2V,
2149 isa<FPMathOperator>(PN) ? &PN : nullptr,
2150 BB1V->getName() + "." + BB2V->getName(), BI));
2151 }
2152
2153 // Make the PHI node use the select for all incoming values for BB1/BB2
2154 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2155 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2156 PN.setIncomingValue(i, SI);
2157 }
2158 }
2159 }
2160
2162
2163 // Update any PHI nodes in our new successors.
2164 for (BasicBlock *Succ : successors(BB1)) {
2165 addPredecessorToBlock(Succ, TIParent, BB1);
2166 if (DTU)
2167 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2168 }
2169
2170 if (DTU) {
2171 // TI might be a switch with multi-cases destination, so we need to care for
2172 // the duplication of successors.
2173 for (BasicBlock *Succ : UniqueSuccessors)
2174 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2175 }
2176
2178 if (DTU)
2179 DTU->applyUpdates(Updates);
2180 return Changed;
2181}
2182
2183// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2184// into variables.
2186 int OpIdx) {
2187 // Divide/Remainder by constant is typically much cheaper than by variable.
2188 if (I->isIntDivRem())
2189 return OpIdx != 1;
2190 return !isa<IntrinsicInst>(I);
2191}
2192
2193// All instructions in Insts belong to different blocks that all unconditionally
2194// branch to a common successor. Analyze each instruction and return true if it
2195// would be possible to sink them into their successor, creating one common
2196// instruction instead. For every value that would be required to be provided by
2197// PHI node (because an operand varies in each input block), add to PHIOperands.
2200 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2201 // Prune out obviously bad instructions to move. Each instruction must have
2202 // the same number of uses, and we check later that the uses are consistent.
2203 std::optional<unsigned> NumUses;
2204 for (auto *I : Insts) {
2205 // These instructions may change or break semantics if moved.
2206 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2207 I->getType()->isTokenTy())
2208 return false;
2209
2210 // Do not try to sink an instruction in an infinite loop - it can cause
2211 // this algorithm to infinite loop.
2212 if (I->getParent()->getSingleSuccessor() == I->getParent())
2213 return false;
2214
2215 // Conservatively return false if I is an inline-asm instruction. Sinking
2216 // and merging inline-asm instructions can potentially create arguments
2217 // that cannot satisfy the inline-asm constraints.
2218 // If the instruction has nomerge or convergent attribute, return false.
2219 if (const auto *C = dyn_cast<CallBase>(I))
2220 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2221 return false;
2222
2223 if (!NumUses)
2224 NumUses = I->getNumUses();
2225 else if (NumUses != I->getNumUses())
2226 return false;
2227 }
2228
2229 const Instruction *I0 = Insts.front();
2230 const auto I0MMRA = MMRAMetadata(*I0);
2231 for (auto *I : Insts) {
2232 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2233 return false;
2234
2235 // Treat MMRAs conservatively. This pass can be quite aggressive and
2236 // could drop a lot of MMRAs otherwise.
2237 if (MMRAMetadata(*I) != I0MMRA)
2238 return false;
2239 }
2240
2241 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2242 // then the other phi operands must match the instructions from Insts. This
2243 // also has to hold true for any phi nodes that would be created as a result
2244 // of sinking. Both of these cases are represented by PhiOperands.
2245 for (const Use &U : I0->uses()) {
2246 auto It = PHIOperands.find(&U);
2247 if (It == PHIOperands.end())
2248 // There may be uses in other blocks when sinking into a loop header.
2249 return false;
2250 if (!equal(Insts, It->second))
2251 return false;
2252 }
2253
2254 // For calls to be sinkable, they must all be indirect, or have same callee.
2255 // I.e. if we have two direct calls to different callees, we don't want to
2256 // turn that into an indirect call. Likewise, if we have an indirect call,
2257 // and a direct call, we don't actually want to have a single indirect call.
2258 if (isa<CallBase>(I0)) {
2259 auto IsIndirectCall = [](const Instruction *I) {
2260 return cast<CallBase>(I)->isIndirectCall();
2261 };
2262 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2263 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2264 if (HaveIndirectCalls) {
2265 if (!AllCallsAreIndirect)
2266 return false;
2267 } else {
2268 // All callees must be identical.
2269 Value *Callee = nullptr;
2270 for (const Instruction *I : Insts) {
2271 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2272 if (!Callee)
2273 Callee = CurrCallee;
2274 else if (Callee != CurrCallee)
2275 return false;
2276 }
2277 }
2278 }
2279
2280 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2281 Value *Op = I0->getOperand(OI);
2282 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2283 assert(I->getNumOperands() == I0->getNumOperands());
2284 return I->getOperand(OI) == I0->getOperand(OI);
2285 };
2286 if (!all_of(Insts, SameAsI0)) {
2289 // We can't create a PHI from this GEP.
2290 return false;
2291 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2292 for (auto *I : Insts)
2293 Ops.push_back(I->getOperand(OI));
2294 }
2295 }
2296 return true;
2297}
2298
2299// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2300// instruction of every block in Blocks to their common successor, commoning
2301// into one instruction.
2303 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2304
2305 // canSinkInstructions returning true guarantees that every block has at
2306 // least one non-terminator instruction.
2308 for (auto *BB : Blocks) {
2309 Instruction *I = BB->getTerminator();
2310 I = I->getPrevNode();
2311 Insts.push_back(I);
2312 }
2313
2314 // We don't need to do any more checking here; canSinkInstructions should
2315 // have done it all for us.
2316 SmallVector<Value*, 4> NewOperands;
2317 Instruction *I0 = Insts.front();
2318 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2319 // This check is different to that in canSinkInstructions. There, we
2320 // cared about the global view once simplifycfg (and instcombine) have
2321 // completed - it takes into account PHIs that become trivially
2322 // simplifiable. However here we need a more local view; if an operand
2323 // differs we create a PHI and rely on instcombine to clean up the very
2324 // small mess we may make.
2325 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2326 return I->getOperand(O) != I0->getOperand(O);
2327 });
2328 if (!NeedPHI) {
2329 NewOperands.push_back(I0->getOperand(O));
2330 continue;
2331 }
2332
2333 // Create a new PHI in the successor block and populate it.
2334 auto *Op = I0->getOperand(O);
2335 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2336 auto *PN =
2337 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2338 PN->insertBefore(BBEnd->begin());
2339 for (auto *I : Insts)
2340 PN->addIncoming(I->getOperand(O), I->getParent());
2341 NewOperands.push_back(PN);
2342 }
2343
2344 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2345 // and move it to the start of the successor block.
2346 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2347 I0->getOperandUse(O).set(NewOperands[O]);
2348
2349 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2350
2351 // Update metadata and IR flags, and merge debug locations.
2352 for (auto *I : Insts)
2353 if (I != I0) {
2354 // The debug location for the "common" instruction is the merged locations
2355 // of all the commoned instructions. We start with the original location
2356 // of the "common" instruction and iteratively merge each location in the
2357 // loop below.
2358 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2359 // However, as N-way merge for CallInst is rare, so we use simplified API
2360 // instead of using complex API for N-way merge.
2361 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2362 combineMetadataForCSE(I0, I, true);
2363 I0->andIRFlags(I);
2364 if (auto *CB = dyn_cast<CallBase>(I0)) {
2365 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2366 assert(Success && "We should not be trying to sink callbases "
2367 "with non-intersectable attributes");
2368 // For NDEBUG Compile.
2369 (void)Success;
2370 }
2371 }
2372
2373 for (User *U : make_early_inc_range(I0->users())) {
2374 // canSinkLastInstruction checked that all instructions are only used by
2375 // phi nodes in a way that allows replacing the phi node with the common
2376 // instruction.
2377 auto *PN = cast<PHINode>(U);
2378 PN->replaceAllUsesWith(I0);
2379 PN->eraseFromParent();
2380 }
2381
2382 // Finally nuke all instructions apart from the common instruction.
2383 for (auto *I : Insts) {
2384 if (I == I0)
2385 continue;
2386 // The remaining uses are debug users, replace those with the common inst.
2387 // In most (all?) cases this just introduces a use-before-def.
2388 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2389 I->replaceAllUsesWith(I0);
2390 I->eraseFromParent();
2391 }
2392}
2393
2394/// Check whether BB's predecessors end with unconditional branches. If it is
2395/// true, sink any common code from the predecessors to BB.
2397 DomTreeUpdater *DTU) {
2398 // We support two situations:
2399 // (1) all incoming arcs are unconditional
2400 // (2) there are non-unconditional incoming arcs
2401 //
2402 // (2) is very common in switch defaults and
2403 // else-if patterns;
2404 //
2405 // if (a) f(1);
2406 // else if (b) f(2);
2407 //
2408 // produces:
2409 //
2410 // [if]
2411 // / \
2412 // [f(1)] [if]
2413 // | | \
2414 // | | |
2415 // | [f(2)]|
2416 // \ | /
2417 // [ end ]
2418 //
2419 // [end] has two unconditional predecessor arcs and one conditional. The
2420 // conditional refers to the implicit empty 'else' arc. This conditional
2421 // arc can also be caused by an empty default block in a switch.
2422 //
2423 // In this case, we attempt to sink code from all *unconditional* arcs.
2424 // If we can sink instructions from these arcs (determined during the scan
2425 // phase below) we insert a common successor for all unconditional arcs and
2426 // connect that to [end], to enable sinking:
2427 //
2428 // [if]
2429 // / \
2430 // [x(1)] [if]
2431 // | | \
2432 // | | \
2433 // | [x(2)] |
2434 // \ / |
2435 // [sink.split] |
2436 // \ /
2437 // [ end ]
2438 //
2439 SmallVector<BasicBlock*,4> UnconditionalPreds;
2440 bool HaveNonUnconditionalPredecessors = false;
2441 for (auto *PredBB : predecessors(BB)) {
2442 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2443 if (PredBr && PredBr->isUnconditional())
2444 UnconditionalPreds.push_back(PredBB);
2445 else
2446 HaveNonUnconditionalPredecessors = true;
2447 }
2448 if (UnconditionalPreds.size() < 2)
2449 return false;
2450
2451 // We take a two-step approach to tail sinking. First we scan from the end of
2452 // each block upwards in lockstep. If the n'th instruction from the end of each
2453 // block can be sunk, those instructions are added to ValuesToSink and we
2454 // carry on. If we can sink an instruction but need to PHI-merge some operands
2455 // (because they're not identical in each instruction) we add these to
2456 // PHIOperands.
2457 // We prepopulate PHIOperands with the phis that already exist in BB.
2459 for (PHINode &PN : BB->phis()) {
2461 for (const Use &U : PN.incoming_values())
2462 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2463 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2464 for (BasicBlock *Pred : UnconditionalPreds)
2465 Ops.push_back(*IncomingVals[Pred]);
2466 }
2467
2468 int ScanIdx = 0;
2469 SmallPtrSet<Value*,4> InstructionsToSink;
2470 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2471 while (LRI.isValid() &&
2472 canSinkInstructions(*LRI, PHIOperands)) {
2473 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2474 << "\n");
2475 InstructionsToSink.insert_range(*LRI);
2476 ++ScanIdx;
2477 --LRI;
2478 }
2479
2480 // If no instructions can be sunk, early-return.
2481 if (ScanIdx == 0)
2482 return false;
2483
2484 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2485
2486 if (!followedByDeoptOrUnreachable) {
2487 // Check whether this is the pointer operand of a load/store.
2488 auto IsMemOperand = [](Use &U) {
2489 auto *I = cast<Instruction>(U.getUser());
2490 if (isa<LoadInst>(I))
2491 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2492 if (isa<StoreInst>(I))
2493 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2494 return false;
2495 };
2496
2497 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2498 // actually sink before encountering instruction that is unprofitable to
2499 // sink?
2500 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2501 unsigned NumPHIInsts = 0;
2502 for (Use &U : (*LRI)[0]->operands()) {
2503 auto It = PHIOperands.find(&U);
2504 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2505 return InstructionsToSink.contains(V);
2506 })) {
2507 ++NumPHIInsts;
2508 // Do not separate a load/store from the gep producing the address.
2509 // The gep can likely be folded into the load/store as an addressing
2510 // mode. Additionally, a load of a gep is easier to analyze than a
2511 // load of a phi.
2512 if (IsMemOperand(U) &&
2513 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2514 return false;
2515 // FIXME: this check is overly optimistic. We may end up not sinking
2516 // said instruction, due to the very same profitability check.
2517 // See @creating_too_many_phis in sink-common-code.ll.
2518 }
2519 }
2520 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2521 return NumPHIInsts <= 1;
2522 };
2523
2524 // We've determined that we are going to sink last ScanIdx instructions,
2525 // and recorded them in InstructionsToSink. Now, some instructions may be
2526 // unprofitable to sink. But that determination depends on the instructions
2527 // that we are going to sink.
2528
2529 // First, forward scan: find the first instruction unprofitable to sink,
2530 // recording all the ones that are profitable to sink.
2531 // FIXME: would it be better, after we detect that not all are profitable.
2532 // to either record the profitable ones, or erase the unprofitable ones?
2533 // Maybe we need to choose (at runtime) the one that will touch least
2534 // instrs?
2535 LRI.reset();
2536 int Idx = 0;
2537 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2538 while (Idx < ScanIdx) {
2539 if (!ProfitableToSinkInstruction(LRI)) {
2540 // Too many PHIs would be created.
2541 LLVM_DEBUG(
2542 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2543 break;
2544 }
2545 InstructionsProfitableToSink.insert_range(*LRI);
2546 --LRI;
2547 ++Idx;
2548 }
2549
2550 // If no instructions can be sunk, early-return.
2551 if (Idx == 0)
2552 return false;
2553
2554 // Did we determine that (only) some instructions are unprofitable to sink?
2555 if (Idx < ScanIdx) {
2556 // Okay, some instructions are unprofitable.
2557 ScanIdx = Idx;
2558 InstructionsToSink = InstructionsProfitableToSink;
2559
2560 // But, that may make other instructions unprofitable, too.
2561 // So, do a backward scan, do any earlier instructions become
2562 // unprofitable?
2563 assert(
2564 !ProfitableToSinkInstruction(LRI) &&
2565 "We already know that the last instruction is unprofitable to sink");
2566 ++LRI;
2567 --Idx;
2568 while (Idx >= 0) {
2569 // If we detect that an instruction becomes unprofitable to sink,
2570 // all earlier instructions won't be sunk either,
2571 // so preemptively keep InstructionsProfitableToSink in sync.
2572 // FIXME: is this the most performant approach?
2573 for (auto *I : *LRI)
2574 InstructionsProfitableToSink.erase(I);
2575 if (!ProfitableToSinkInstruction(LRI)) {
2576 // Everything starting with this instruction won't be sunk.
2577 ScanIdx = Idx;
2578 InstructionsToSink = InstructionsProfitableToSink;
2579 }
2580 ++LRI;
2581 --Idx;
2582 }
2583 }
2584
2585 // If no instructions can be sunk, early-return.
2586 if (ScanIdx == 0)
2587 return false;
2588 }
2589
2590 bool Changed = false;
2591
2592 if (HaveNonUnconditionalPredecessors) {
2593 if (!followedByDeoptOrUnreachable) {
2594 // It is always legal to sink common instructions from unconditional
2595 // predecessors. However, if not all predecessors are unconditional,
2596 // this transformation might be pessimizing. So as a rule of thumb,
2597 // don't do it unless we'd sink at least one non-speculatable instruction.
2598 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2599 LRI.reset();
2600 int Idx = 0;
2601 bool Profitable = false;
2602 while (Idx < ScanIdx) {
2603 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2604 Profitable = true;
2605 break;
2606 }
2607 --LRI;
2608 ++Idx;
2609 }
2610 if (!Profitable)
2611 return false;
2612 }
2613
2614 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2615 // We have a conditional edge and we're going to sink some instructions.
2616 // Insert a new block postdominating all blocks we're going to sink from.
2617 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2618 // Edges couldn't be split.
2619 return false;
2620 Changed = true;
2621 }
2622
2623 // Now that we've analyzed all potential sinking candidates, perform the
2624 // actual sink. We iteratively sink the last non-terminator of the source
2625 // blocks into their common successor unless doing so would require too
2626 // many PHI instructions to be generated (currently only one PHI is allowed
2627 // per sunk instruction).
2628 //
2629 // We can use InstructionsToSink to discount values needing PHI-merging that will
2630 // actually be sunk in a later iteration. This allows us to be more
2631 // aggressive in what we sink. This does allow a false positive where we
2632 // sink presuming a later value will also be sunk, but stop half way through
2633 // and never actually sink it which means we produce more PHIs than intended.
2634 // This is unlikely in practice though.
2635 int SinkIdx = 0;
2636 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2637 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2638 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2639 << "\n");
2640
2641 // Because we've sunk every instruction in turn, the current instruction to
2642 // sink is always at index 0.
2643 LRI.reset();
2644
2645 sinkLastInstruction(UnconditionalPreds);
2646 NumSinkCommonInstrs++;
2647 Changed = true;
2648 }
2649 if (SinkIdx != 0)
2650 ++NumSinkCommonCode;
2651 return Changed;
2652}
2653
2654namespace {
2655
2656struct CompatibleSets {
2657 using SetTy = SmallVector<InvokeInst *, 2>;
2658
2660
2661 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2662
2663 SetTy &getCompatibleSet(InvokeInst *II);
2664
2665 void insert(InvokeInst *II);
2666};
2667
2668CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2669 // Perform a linear scan over all the existing sets, see if the new `invoke`
2670 // is compatible with any particular set. Since we know that all the `invokes`
2671 // within a set are compatible, only check the first `invoke` in each set.
2672 // WARNING: at worst, this has quadratic complexity.
2673 for (CompatibleSets::SetTy &Set : Sets) {
2674 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2675 return Set;
2676 }
2677
2678 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2679 return Sets.emplace_back();
2680}
2681
2682void CompatibleSets::insert(InvokeInst *II) {
2683 getCompatibleSet(II).emplace_back(II);
2684}
2685
2686bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2687 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2688
2689 // Can we theoretically merge these `invoke`s?
2690 auto IsIllegalToMerge = [](InvokeInst *II) {
2691 return II->cannotMerge() || II->isInlineAsm();
2692 };
2693 if (any_of(Invokes, IsIllegalToMerge))
2694 return false;
2695
2696 // Either both `invoke`s must be direct,
2697 // or both `invoke`s must be indirect.
2698 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2699 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2700 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2701 if (HaveIndirectCalls) {
2702 if (!AllCallsAreIndirect)
2703 return false;
2704 } else {
2705 // All callees must be identical.
2706 Value *Callee = nullptr;
2707 for (InvokeInst *II : Invokes) {
2708 Value *CurrCallee = II->getCalledOperand();
2709 assert(CurrCallee && "There is always a called operand.");
2710 if (!Callee)
2711 Callee = CurrCallee;
2712 else if (Callee != CurrCallee)
2713 return false;
2714 }
2715 }
2716
2717 // Either both `invoke`s must not have a normal destination,
2718 // or both `invoke`s must have a normal destination,
2719 auto HasNormalDest = [](InvokeInst *II) {
2720 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2721 };
2722 if (any_of(Invokes, HasNormalDest)) {
2723 // Do not merge `invoke` that does not have a normal destination with one
2724 // that does have a normal destination, even though doing so would be legal.
2725 if (!all_of(Invokes, HasNormalDest))
2726 return false;
2727
2728 // All normal destinations must be identical.
2729 BasicBlock *NormalBB = nullptr;
2730 for (InvokeInst *II : Invokes) {
2731 BasicBlock *CurrNormalBB = II->getNormalDest();
2732 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2733 if (!NormalBB)
2734 NormalBB = CurrNormalBB;
2735 else if (NormalBB != CurrNormalBB)
2736 return false;
2737 }
2738
2739 // In the normal destination, the incoming values for these two `invoke`s
2740 // must be compatible.
2741 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2743 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2744 &EquivalenceSet))
2745 return false;
2746 }
2747
2748#ifndef NDEBUG
2749 // All unwind destinations must be identical.
2750 // We know that because we have started from said unwind destination.
2751 BasicBlock *UnwindBB = nullptr;
2752 for (InvokeInst *II : Invokes) {
2753 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2754 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2755 if (!UnwindBB)
2756 UnwindBB = CurrUnwindBB;
2757 else
2758 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2759 }
2760#endif
2761
2762 // In the unwind destination, the incoming values for these two `invoke`s
2763 // must be compatible.
2765 Invokes.front()->getUnwindDest(),
2766 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2767 return false;
2768
2769 // Ignoring arguments, these `invoke`s must be identical,
2770 // including operand bundles.
2771 const InvokeInst *II0 = Invokes.front();
2772 for (auto *II : Invokes.drop_front())
2773 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2774 return false;
2775
2776 // Can we theoretically form the data operands for the merged `invoke`?
2777 auto IsIllegalToMergeArguments = [](auto Ops) {
2778 Use &U0 = std::get<0>(Ops);
2779 Use &U1 = std::get<1>(Ops);
2780 if (U0 == U1)
2781 return false;
2783 U0.getOperandNo());
2784 };
2785 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2786 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2787 IsIllegalToMergeArguments))
2788 return false;
2789
2790 return true;
2791}
2792
2793} // namespace
2794
2795// Merge all invokes in the provided set, all of which are compatible
2796// as per the `CompatibleSets::shouldBelongToSameSet()`.
2798 DomTreeUpdater *DTU) {
2799 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2800
2802 if (DTU)
2803 Updates.reserve(2 + 3 * Invokes.size());
2804
2805 bool HasNormalDest =
2806 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2807
2808 // Clone one of the invokes into a new basic block.
2809 // Since they are all compatible, it doesn't matter which invoke is cloned.
2810 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2811 InvokeInst *II0 = Invokes.front();
2812 BasicBlock *II0BB = II0->getParent();
2813 BasicBlock *InsertBeforeBlock =
2814 II0->getParent()->getIterator()->getNextNode();
2815 Function *Func = II0BB->getParent();
2816 LLVMContext &Ctx = II0->getContext();
2817
2818 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2819 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2820
2821 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2822 // NOTE: all invokes have the same attributes, so no handling needed.
2823 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2824
2825 if (!HasNormalDest) {
2826 // This set does not have a normal destination,
2827 // so just form a new block with unreachable terminator.
2828 BasicBlock *MergedNormalDest = BasicBlock::Create(
2829 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2830 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2831 UI->setDebugLoc(DebugLoc::getTemporary());
2832 MergedInvoke->setNormalDest(MergedNormalDest);
2833 }
2834
2835 // The unwind destination, however, remainds identical for all invokes here.
2836
2837 return MergedInvoke;
2838 }();
2839
2840 if (DTU) {
2841 // Predecessor blocks that contained these invokes will now branch to
2842 // the new block that contains the merged invoke, ...
2843 for (InvokeInst *II : Invokes)
2844 Updates.push_back(
2845 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2846
2847 // ... which has the new `unreachable` block as normal destination,
2848 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2849 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2850 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2851 SuccBBOfMergedInvoke});
2852
2853 // Since predecessor blocks now unconditionally branch to a new block,
2854 // they no longer branch to their original successors.
2855 for (InvokeInst *II : Invokes)
2856 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2857 Updates.push_back(
2858 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2859 }
2860
2861 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2862
2863 // Form the merged operands for the merged invoke.
2864 for (Use &U : MergedInvoke->operands()) {
2865 // Only PHI together the indirect callees and data operands.
2866 if (MergedInvoke->isCallee(&U)) {
2867 if (!IsIndirectCall)
2868 continue;
2869 } else if (!MergedInvoke->isDataOperand(&U))
2870 continue;
2871
2872 // Don't create trivial PHI's with all-identical incoming values.
2873 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2874 return II->getOperand(U.getOperandNo()) != U.get();
2875 });
2876 if (!NeedPHI)
2877 continue;
2878
2879 // Form a PHI out of all the data ops under this index.
2881 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2882 for (InvokeInst *II : Invokes)
2883 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2884
2885 U.set(PN);
2886 }
2887
2888 // We've ensured that each PHI node has compatible (identical) incoming values
2889 // when coming from each of the `invoke`s in the current merge set,
2890 // so update the PHI nodes accordingly.
2891 for (BasicBlock *Succ : successors(MergedInvoke))
2892 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2893 /*ExistPred=*/Invokes.front()->getParent());
2894
2895 // And finally, replace the original `invoke`s with an unconditional branch
2896 // to the block with the merged `invoke`. Also, give that merged `invoke`
2897 // the merged debugloc of all the original `invoke`s.
2898 DILocation *MergedDebugLoc = nullptr;
2899 for (InvokeInst *II : Invokes) {
2900 // Compute the debug location common to all the original `invoke`s.
2901 if (!MergedDebugLoc)
2902 MergedDebugLoc = II->getDebugLoc();
2903 else
2904 MergedDebugLoc =
2905 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2906
2907 // And replace the old `invoke` with an unconditionally branch
2908 // to the block with the merged `invoke`.
2909 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2910 OrigSuccBB->removePredecessor(II->getParent());
2911 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2912 // The unconditional branch is part of the replacement for the original
2913 // invoke, so should use its DebugLoc.
2914 BI->setDebugLoc(II->getDebugLoc());
2915 bool Success = MergedInvoke->tryIntersectAttributes(II);
2916 assert(Success && "Merged invokes with incompatible attributes");
2917 // For NDEBUG Compile
2918 (void)Success;
2919 II->replaceAllUsesWith(MergedInvoke);
2920 II->eraseFromParent();
2921 ++NumInvokesMerged;
2922 }
2923 MergedInvoke->setDebugLoc(MergedDebugLoc);
2924 ++NumInvokeSetsFormed;
2925
2926 if (DTU)
2927 DTU->applyUpdates(Updates);
2928}
2929
2930/// If this block is a `landingpad` exception handling block, categorize all
2931/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2932/// being "mergeable" together, and then merge invokes in each set together.
2933///
2934/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2935/// [...] [...]
2936/// | |
2937/// [invoke0] [invoke1]
2938/// / \ / \
2939/// [cont0] [landingpad] [cont1]
2940/// to:
2941/// [...] [...]
2942/// \ /
2943/// [invoke]
2944/// / \
2945/// [cont] [landingpad]
2946///
2947/// But of course we can only do that if the invokes share the `landingpad`,
2948/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2949/// and the invoked functions are "compatible".
2952 return false;
2953
2954 bool Changed = false;
2955
2956 // FIXME: generalize to all exception handling blocks?
2957 if (!BB->isLandingPad())
2958 return Changed;
2959
2960 CompatibleSets Grouper;
2961
2962 // Record all the predecessors of this `landingpad`. As per verifier,
2963 // the only allowed predecessor is the unwind edge of an `invoke`.
2964 // We want to group "compatible" `invokes` into the same set to be merged.
2965 for (BasicBlock *PredBB : predecessors(BB))
2966 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2967
2968 // And now, merge `invoke`s that were grouped togeter.
2969 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2970 if (Invokes.size() < 2)
2971 continue;
2972 Changed = true;
2973 mergeCompatibleInvokesImpl(Invokes, DTU);
2974 }
2975
2976 return Changed;
2977}
2978
2979namespace {
2980/// Track ephemeral values, which should be ignored for cost-modelling
2981/// purposes. Requires walking instructions in reverse order.
2982class EphemeralValueTracker {
2983 SmallPtrSet<const Instruction *, 32> EphValues;
2984
2985 bool isEphemeral(const Instruction *I) {
2986 if (isa<AssumeInst>(I))
2987 return true;
2988 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2989 all_of(I->users(), [&](const User *U) {
2990 return EphValues.count(cast<Instruction>(U));
2991 });
2992 }
2993
2994public:
2995 bool track(const Instruction *I) {
2996 if (isEphemeral(I)) {
2997 EphValues.insert(I);
2998 return true;
2999 }
3000 return false;
3001 }
3002
3003 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3004};
3005} // namespace
3006
3007/// Determine if we can hoist sink a sole store instruction out of a
3008/// conditional block.
3009///
3010/// We are looking for code like the following:
3011/// BrBB:
3012/// store i32 %add, i32* %arrayidx2
3013/// ... // No other stores or function calls (we could be calling a memory
3014/// ... // function).
3015/// %cmp = icmp ult %x, %y
3016/// br i1 %cmp, label %EndBB, label %ThenBB
3017/// ThenBB:
3018/// store i32 %add5, i32* %arrayidx2
3019/// br label EndBB
3020/// EndBB:
3021/// ...
3022/// We are going to transform this into:
3023/// BrBB:
3024/// store i32 %add, i32* %arrayidx2
3025/// ... //
3026/// %cmp = icmp ult %x, %y
3027/// %add.add5 = select i1 %cmp, i32 %add, %add5
3028/// store i32 %add.add5, i32* %arrayidx2
3029/// ...
3030///
3031/// \return The pointer to the value of the previous store if the store can be
3032/// hoisted into the predecessor block. 0 otherwise.
3034 BasicBlock *StoreBB, BasicBlock *EndBB) {
3035 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3036 if (!StoreToHoist)
3037 return nullptr;
3038
3039 // Volatile or atomic.
3040 if (!StoreToHoist->isSimple())
3041 return nullptr;
3042
3043 Value *StorePtr = StoreToHoist->getPointerOperand();
3044 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3045
3046 // Look for a store to the same pointer in BrBB.
3047 unsigned MaxNumInstToLookAt = 9;
3048 // Skip pseudo probe intrinsic calls which are not really killing any memory
3049 // accesses.
3050 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3051 if (!MaxNumInstToLookAt)
3052 break;
3053 --MaxNumInstToLookAt;
3054
3055 // Could be calling an instruction that affects memory like free().
3056 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3057 return nullptr;
3058
3059 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3060 // Found the previous store to same location and type. Make sure it is
3061 // simple, to avoid introducing a spurious non-atomic write after an
3062 // atomic write.
3063 if (SI->getPointerOperand() == StorePtr &&
3064 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3065 SI->getAlign() >= StoreToHoist->getAlign())
3066 // Found the previous store, return its value operand.
3067 return SI->getValueOperand();
3068 return nullptr; // Unknown store.
3069 }
3070
3071 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3072 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3073 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3074 Value *Obj = getUnderlyingObject(StorePtr);
3075 bool ExplicitlyDereferenceableOnly;
3076 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3078 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3080 (!ExplicitlyDereferenceableOnly ||
3081 isDereferenceablePointer(StorePtr, StoreTy,
3082 LI->getDataLayout()))) {
3083 // Found a previous load, return it.
3084 return LI;
3085 }
3086 }
3087 // The load didn't work out, but we may still find a store.
3088 }
3089 }
3090
3091 return nullptr;
3092}
3093
3094/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3095/// converted to selects.
3097 BasicBlock *EndBB,
3098 unsigned &SpeculatedInstructions,
3099 InstructionCost &Cost,
3100 const TargetTransformInfo &TTI) {
3102 BB->getParent()->hasMinSize()
3105
3106 bool HaveRewritablePHIs = false;
3107 for (PHINode &PN : EndBB->phis()) {
3108 Value *OrigV = PN.getIncomingValueForBlock(BB);
3109 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3110
3111 // FIXME: Try to remove some of the duplication with
3112 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3113 if (ThenV == OrigV)
3114 continue;
3115
3116 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3117 CmpInst::makeCmpResultType(PN.getType()),
3119
3120 // Don't convert to selects if we could remove undefined behavior instead.
3121 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3123 return false;
3124
3125 HaveRewritablePHIs = true;
3126 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3127 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3128 if (!OrigCE && !ThenCE)
3129 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3130
3131 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3132 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3133 InstructionCost MaxCost =
3135 if (OrigCost + ThenCost > MaxCost)
3136 return false;
3137
3138 // Account for the cost of an unfolded ConstantExpr which could end up
3139 // getting expanded into Instructions.
3140 // FIXME: This doesn't account for how many operations are combined in the
3141 // constant expression.
3142 ++SpeculatedInstructions;
3143 if (SpeculatedInstructions > 1)
3144 return false;
3145 }
3146
3147 return HaveRewritablePHIs;
3148}
3149
3151 std::optional<bool> Invert,
3152 const TargetTransformInfo &TTI) {
3153 // If the branch is non-unpredictable, and is predicted to *not* branch to
3154 // the `then` block, then avoid speculating it.
3155 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3156 return true;
3157
3158 uint64_t TWeight, FWeight;
3159 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3160 return true;
3161
3162 if (!Invert.has_value())
3163 return false;
3164
3165 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3166 BranchProbability BIEndProb =
3167 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3168 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3169 return BIEndProb < Likely;
3170}
3171
3172/// Speculate a conditional basic block flattening the CFG.
3173///
3174/// Note that this is a very risky transform currently. Speculating
3175/// instructions like this is most often not desirable. Instead, there is an MI
3176/// pass which can do it with full awareness of the resource constraints.
3177/// However, some cases are "obvious" and we should do directly. An example of
3178/// this is speculating a single, reasonably cheap instruction.
3179///
3180/// There is only one distinct advantage to flattening the CFG at the IR level:
3181/// it makes very common but simplistic optimizations such as are common in
3182/// instcombine and the DAG combiner more powerful by removing CFG edges and
3183/// modeling their effects with easier to reason about SSA value graphs.
3184///
3185///
3186/// An illustration of this transform is turning this IR:
3187/// \code
3188/// BB:
3189/// %cmp = icmp ult %x, %y
3190/// br i1 %cmp, label %EndBB, label %ThenBB
3191/// ThenBB:
3192/// %sub = sub %x, %y
3193/// br label BB2
3194/// EndBB:
3195/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3196/// ...
3197/// \endcode
3198///
3199/// Into this IR:
3200/// \code
3201/// BB:
3202/// %cmp = icmp ult %x, %y
3203/// %sub = sub %x, %y
3204/// %cond = select i1 %cmp, 0, %sub
3205/// ...
3206/// \endcode
3207///
3208/// \returns true if the conditional block is removed.
3209bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3210 BasicBlock *ThenBB) {
3211 if (!Options.SpeculateBlocks)
3212 return false;
3213
3214 // Be conservative for now. FP select instruction can often be expensive.
3215 Value *BrCond = BI->getCondition();
3216 if (isa<FCmpInst>(BrCond))
3217 return false;
3218
3219 BasicBlock *BB = BI->getParent();
3220 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3221 InstructionCost Budget =
3223
3224 // If ThenBB is actually on the false edge of the conditional branch, remember
3225 // to swap the select operands later.
3226 bool Invert = false;
3227 if (ThenBB != BI->getSuccessor(0)) {
3228 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3229 Invert = true;
3230 }
3231 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3232
3233 if (!isProfitableToSpeculate(BI, Invert, TTI))
3234 return false;
3235
3236 // Keep a count of how many times instructions are used within ThenBB when
3237 // they are candidates for sinking into ThenBB. Specifically:
3238 // - They are defined in BB, and
3239 // - They have no side effects, and
3240 // - All of their uses are in ThenBB.
3241 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3242
3243 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3244
3245 unsigned SpeculatedInstructions = 0;
3246 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3247 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3248 Value *SpeculatedStoreValue = nullptr;
3249 StoreInst *SpeculatedStore = nullptr;
3250 EphemeralValueTracker EphTracker;
3251 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3252 // Skip pseudo probes. The consequence is we lose track of the branch
3253 // probability for ThenBB, which is fine since the optimization here takes
3254 // place regardless of the branch probability.
3255 if (isa<PseudoProbeInst>(I)) {
3256 // The probe should be deleted so that it will not be over-counted when
3257 // the samples collected on the non-conditional path are counted towards
3258 // the conditional path. We leave it for the counts inference algorithm to
3259 // figure out a proper count for an unknown probe.
3260 SpeculatedPseudoProbes.push_back(&I);
3261 continue;
3262 }
3263
3264 // Ignore ephemeral values, they will be dropped by the transform.
3265 if (EphTracker.track(&I))
3266 continue;
3267
3268 // Only speculatively execute a single instruction (not counting the
3269 // terminator) for now.
3270 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3272 SpeculatedConditionalLoadsStores.size() <
3274 // Not count load/store into cost if target supports conditional faulting
3275 // b/c it's cheap to speculate it.
3276 if (IsSafeCheapLoadStore)
3277 SpeculatedConditionalLoadsStores.push_back(&I);
3278 else
3279 ++SpeculatedInstructions;
3280
3281 if (SpeculatedInstructions > 1)
3282 return false;
3283
3284 // Don't hoist the instruction if it's unsafe or expensive.
3285 if (!IsSafeCheapLoadStore &&
3287 !(HoistCondStores && !SpeculatedStoreValue &&
3288 (SpeculatedStoreValue =
3289 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3290 return false;
3291 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3294 return false;
3295
3296 // Store the store speculation candidate.
3297 if (!SpeculatedStore && SpeculatedStoreValue)
3298 SpeculatedStore = cast<StoreInst>(&I);
3299
3300 // Do not hoist the instruction if any of its operands are defined but not
3301 // used in BB. The transformation will prevent the operand from
3302 // being sunk into the use block.
3303 for (Use &Op : I.operands()) {
3305 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3306 continue; // Not a candidate for sinking.
3307
3308 ++SinkCandidateUseCounts[OpI];
3309 }
3310 }
3311
3312 // Consider any sink candidates which are only used in ThenBB as costs for
3313 // speculation. Note, while we iterate over a DenseMap here, we are summing
3314 // and so iteration order isn't significant.
3315 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3316 if (Inst->hasNUses(Count)) {
3317 ++SpeculatedInstructions;
3318 if (SpeculatedInstructions > 1)
3319 return false;
3320 }
3321
3322 // Check that we can insert the selects and that it's not too expensive to do
3323 // so.
3324 bool Convert =
3325 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3327 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3328 SpeculatedInstructions, Cost, TTI);
3329 if (!Convert || Cost > Budget)
3330 return false;
3331
3332 // If we get here, we can hoist the instruction and if-convert.
3333 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3334
3335 Instruction *Sel = nullptr;
3336 // Insert a select of the value of the speculated store.
3337 if (SpeculatedStoreValue) {
3338 IRBuilder<NoFolder> Builder(BI);
3339 Value *OrigV = SpeculatedStore->getValueOperand();
3340 Value *TrueV = SpeculatedStore->getValueOperand();
3341 Value *FalseV = SpeculatedStoreValue;
3342 if (Invert)
3343 std::swap(TrueV, FalseV);
3344 Value *S = Builder.CreateSelect(
3345 BrCond, TrueV, FalseV, "spec.store.select", BI);
3346 Sel = cast<Instruction>(S);
3347 SpeculatedStore->setOperand(0, S);
3348 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3349 SpeculatedStore->getDebugLoc());
3350 // The value stored is still conditional, but the store itself is now
3351 // unconditonally executed, so we must be sure that any linked dbg.assign
3352 // intrinsics are tracking the new stored value (the result of the
3353 // select). If we don't, and the store were to be removed by another pass
3354 // (e.g. DSE), then we'd eventually end up emitting a location describing
3355 // the conditional value, unconditionally.
3356 //
3357 // === Before this transformation ===
3358 // pred:
3359 // store %one, %x.dest, !DIAssignID !1
3360 // dbg.assign %one, "x", ..., !1, ...
3361 // br %cond if.then
3362 //
3363 // if.then:
3364 // store %two, %x.dest, !DIAssignID !2
3365 // dbg.assign %two, "x", ..., !2, ...
3366 //
3367 // === After this transformation ===
3368 // pred:
3369 // store %one, %x.dest, !DIAssignID !1
3370 // dbg.assign %one, "x", ..., !1
3371 /// ...
3372 // %merge = select %cond, %two, %one
3373 // store %merge, %x.dest, !DIAssignID !2
3374 // dbg.assign %merge, "x", ..., !2
3375 for (DbgVariableRecord *DbgAssign :
3376 at::getDVRAssignmentMarkers(SpeculatedStore))
3377 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3378 DbgAssign->replaceVariableLocationOp(OrigV, S);
3379 }
3380
3381 // Metadata can be dependent on the condition we are hoisting above.
3382 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3383 // to avoid making it appear as if the condition is a constant, which would
3384 // be misleading while debugging.
3385 // Similarly strip attributes that maybe dependent on condition we are
3386 // hoisting above.
3387 for (auto &I : make_early_inc_range(*ThenBB)) {
3388 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3389 I.dropLocation();
3390 }
3391 I.dropUBImplyingAttrsAndMetadata();
3392
3393 // Drop ephemeral values.
3394 if (EphTracker.contains(&I)) {
3395 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3396 I.eraseFromParent();
3397 }
3398 }
3399
3400 // Hoist the instructions.
3401 // Drop DbgVariableRecords attached to these instructions.
3402 for (auto &It : *ThenBB)
3403 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3404 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3405 // equivalent).
3406 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3407 !DVR || !DVR->isDbgAssign())
3408 It.dropOneDbgRecord(&DR);
3409 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3410 std::prev(ThenBB->end()));
3411
3412 if (!SpeculatedConditionalLoadsStores.empty())
3413 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3414 Sel);
3415
3416 // Insert selects and rewrite the PHI operands.
3417 IRBuilder<NoFolder> Builder(BI);
3418 for (PHINode &PN : EndBB->phis()) {
3419 unsigned OrigI = PN.getBasicBlockIndex(BB);
3420 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3421 Value *OrigV = PN.getIncomingValue(OrigI);
3422 Value *ThenV = PN.getIncomingValue(ThenI);
3423
3424 // Skip PHIs which are trivial.
3425 if (OrigV == ThenV)
3426 continue;
3427
3428 // Create a select whose true value is the speculatively executed value and
3429 // false value is the pre-existing value. Swap them if the branch
3430 // destinations were inverted.
3431 Value *TrueV = ThenV, *FalseV = OrigV;
3432 if (Invert)
3433 std::swap(TrueV, FalseV);
3434 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3435 PN.setIncomingValue(OrigI, V);
3436 PN.setIncomingValue(ThenI, V);
3437 }
3438
3439 // Remove speculated pseudo probes.
3440 for (Instruction *I : SpeculatedPseudoProbes)
3441 I->eraseFromParent();
3442
3443 ++NumSpeculations;
3444 return true;
3445}
3446
3448
3449// Return false if number of blocks searched is too much.
3450static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3451 BlocksSet &ReachesNonLocalUses) {
3452 if (BB == DefBB)
3453 return true;
3454 if (!ReachesNonLocalUses.insert(BB).second)
3455 return true;
3456
3457 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3458 return false;
3459 for (BasicBlock *Pred : predecessors(BB))
3460 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3461 return false;
3462 return true;
3463}
3464
3465/// Return true if we can thread a branch across this block.
3467 BlocksSet &NonLocalUseBlocks) {
3468 int Size = 0;
3469 EphemeralValueTracker EphTracker;
3470
3471 // Walk the loop in reverse so that we can identify ephemeral values properly
3472 // (values only feeding assumes).
3473 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3474 // Can't fold blocks that contain noduplicate or convergent calls.
3475 if (CallInst *CI = dyn_cast<CallInst>(&I))
3476 if (CI->cannotDuplicate() || CI->isConvergent())
3477 return false;
3478
3479 // Ignore ephemeral values which are deleted during codegen.
3480 // We will delete Phis while threading, so Phis should not be accounted in
3481 // block's size.
3482 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3483 if (Size++ > MaxSmallBlockSize)
3484 return false; // Don't clone large BB's.
3485 }
3486
3487 // Record blocks with non-local uses of values defined in the current basic
3488 // block.
3489 for (User *U : I.users()) {
3491 BasicBlock *UsedInBB = UI->getParent();
3492 if (UsedInBB == BB) {
3493 if (isa<PHINode>(UI))
3494 return false;
3495 } else
3496 NonLocalUseBlocks.insert(UsedInBB);
3497 }
3498
3499 // Looks ok, continue checking.
3500 }
3501
3502 return true;
3503}
3504
3506 BasicBlock *To) {
3507 // Don't look past the block defining the value, we might get the value from
3508 // a previous loop iteration.
3509 auto *I = dyn_cast<Instruction>(V);
3510 if (I && I->getParent() == To)
3511 return nullptr;
3512
3513 // We know the value if the From block branches on it.
3514 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3515 if (BI && BI->isConditional() && BI->getCondition() == V &&
3516 BI->getSuccessor(0) != BI->getSuccessor(1))
3517 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3519
3520 return nullptr;
3521}
3522
3523/// If we have a conditional branch on something for which we know the constant
3524/// value in predecessors (e.g. a phi node in the current block), thread edges
3525/// from the predecessor to their ultimate destination.
3526static std::optional<bool>
3528 const DataLayout &DL,
3529 AssumptionCache *AC) {
3531 BasicBlock *BB = BI->getParent();
3532 Value *Cond = BI->getCondition();
3534 if (PN && PN->getParent() == BB) {
3535 // Degenerate case of a single entry PHI.
3536 if (PN->getNumIncomingValues() == 1) {
3538 return true;
3539 }
3540
3541 for (Use &U : PN->incoming_values())
3542 if (auto *CB = dyn_cast<ConstantInt>(U))
3543 KnownValues[CB].insert(PN->getIncomingBlock(U));
3544 } else {
3545 for (BasicBlock *Pred : predecessors(BB)) {
3546 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3547 KnownValues[CB].insert(Pred);
3548 }
3549 }
3550
3551 if (KnownValues.empty())
3552 return false;
3553
3554 // Now we know that this block has multiple preds and two succs.
3555 // Check that the block is small enough and record which non-local blocks use
3556 // values defined in the block.
3557
3558 BlocksSet NonLocalUseBlocks;
3559 BlocksSet ReachesNonLocalUseBlocks;
3560 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3561 return false;
3562
3563 // Jump-threading can only be done to destinations where no values defined
3564 // in BB are live.
3565
3566 // Quickly check if both destinations have uses. If so, jump-threading cannot
3567 // be done.
3568 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3569 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3570 return false;
3571
3572 // Search backward from NonLocalUseBlocks to find which blocks
3573 // reach non-local uses.
3574 for (BasicBlock *UseBB : NonLocalUseBlocks)
3575 // Give up if too many blocks are searched.
3576 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3577 return false;
3578
3579 for (const auto &Pair : KnownValues) {
3580 ConstantInt *CB = Pair.first;
3581 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3582 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3583
3584 // Okay, we now know that all edges from PredBB should be revectored to
3585 // branch to RealDest.
3586 if (RealDest == BB)
3587 continue; // Skip self loops.
3588
3589 // Skip if the predecessor's terminator is an indirect branch.
3590 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3591 return isa<IndirectBrInst>(PredBB->getTerminator());
3592 }))
3593 continue;
3594
3595 // Only revector to RealDest if no values defined in BB are live.
3596 if (ReachesNonLocalUseBlocks.contains(RealDest))
3597 continue;
3598
3599 LLVM_DEBUG({
3600 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3601 << " has value " << *Pair.first << " in predecessors:\n";
3602 for (const BasicBlock *PredBB : Pair.second)
3603 dbgs() << " " << PredBB->getName() << "\n";
3604 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3605 });
3606
3607 // Split the predecessors we are threading into a new edge block. We'll
3608 // clone the instructions into this block, and then redirect it to RealDest.
3609 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3610
3611 // TODO: These just exist to reduce test diff, we can drop them if we like.
3612 EdgeBB->setName(RealDest->getName() + ".critedge");
3613 EdgeBB->moveBefore(RealDest);
3614
3615 // Update PHI nodes.
3616 addPredecessorToBlock(RealDest, EdgeBB, BB);
3617
3618 // BB may have instructions that are being threaded over. Clone these
3619 // instructions into EdgeBB. We know that there will be no uses of the
3620 // cloned instructions outside of EdgeBB.
3621 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3622 ValueToValueMapTy TranslateMap; // Track translated values.
3623 TranslateMap[Cond] = CB;
3624
3625 // RemoveDIs: track instructions that we optimise away while folding, so
3626 // that we can copy DbgVariableRecords from them later.
3627 BasicBlock::iterator SrcDbgCursor = BB->begin();
3628 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3629 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3630 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3631 continue;
3632 }
3633 // Clone the instruction.
3634 Instruction *N = BBI->clone();
3635 // Insert the new instruction into its new home.
3636 N->insertInto(EdgeBB, InsertPt);
3637
3638 if (BBI->hasName())
3639 N->setName(BBI->getName() + ".c");
3640
3641 // Update operands due to translation.
3642 // Key Instructions: Remap all the atom groups.
3643 if (const DebugLoc &DL = BBI->getDebugLoc())
3644 mapAtomInstance(DL, TranslateMap);
3645 RemapInstruction(N, TranslateMap,
3647
3648 // Check for trivial simplification.
3649 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3650 if (!BBI->use_empty())
3651 TranslateMap[&*BBI] = V;
3652 if (!N->mayHaveSideEffects()) {
3653 N->eraseFromParent(); // Instruction folded away, don't need actual
3654 // inst
3655 N = nullptr;
3656 }
3657 } else {
3658 if (!BBI->use_empty())
3659 TranslateMap[&*BBI] = N;
3660 }
3661 if (N) {
3662 // Copy all debug-info attached to instructions from the last we
3663 // successfully clone, up to this instruction (they might have been
3664 // folded away).
3665 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3666 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3667 SrcDbgCursor = std::next(BBI);
3668 // Clone debug-info on this instruction too.
3669 N->cloneDebugInfoFrom(&*BBI);
3670
3671 // Register the new instruction with the assumption cache if necessary.
3672 if (auto *Assume = dyn_cast<AssumeInst>(N))
3673 if (AC)
3674 AC->registerAssumption(Assume);
3675 }
3676 }
3677
3678 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3679 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3680 InsertPt->cloneDebugInfoFrom(BI);
3681
3682 BB->removePredecessor(EdgeBB);
3683 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3684 EdgeBI->setSuccessor(0, RealDest);
3685 EdgeBI->setDebugLoc(BI->getDebugLoc());
3686
3687 if (DTU) {
3689 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3690 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3691 DTU->applyUpdates(Updates);
3692 }
3693
3694 // For simplicity, we created a separate basic block for the edge. Merge
3695 // it back into the predecessor if possible. This not only avoids
3696 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3697 // bypass the check for trivial cycles above.
3698 MergeBlockIntoPredecessor(EdgeBB, DTU);
3699
3700 // Signal repeat, simplifying any other constants.
3701 return std::nullopt;
3702 }
3703
3704 return false;
3705}
3706
3707bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3708 // Note: If BB is a loop header then there is a risk that threading introduces
3709 // a non-canonical loop by moving a back edge. So we avoid this optimization
3710 // for loop headers if NeedCanonicalLoop is set.
3711 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3712 return false;
3713
3714 std::optional<bool> Result;
3715 bool EverChanged = false;
3716 do {
3717 // Note that None means "we changed things, but recurse further."
3718 Result =
3720 EverChanged |= Result == std::nullopt || *Result;
3721 } while (Result == std::nullopt);
3722 return EverChanged;
3723}
3724
3725/// Given a BB that starts with the specified two-entry PHI node,
3726/// see if we can eliminate it.
3729 const DataLayout &DL,
3730 bool SpeculateUnpredictables) {
3731 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3732 // statement", which has a very simple dominance structure. Basically, we
3733 // are trying to find the condition that is being branched on, which
3734 // subsequently causes this merge to happen. We really want control
3735 // dependence information for this check, but simplifycfg can't keep it up
3736 // to date, and this catches most of the cases we care about anyway.
3737 BasicBlock *BB = PN->getParent();
3738
3739 BasicBlock *IfTrue, *IfFalse;
3740 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3741 if (!DomBI)
3742 return false;
3743 Value *IfCond = DomBI->getCondition();
3744 // Don't bother if the branch will be constant folded trivially.
3745 if (isa<ConstantInt>(IfCond))
3746 return false;
3747
3748 BasicBlock *DomBlock = DomBI->getParent();
3751 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3752 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3753 });
3754 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3755 "Will have either one or two blocks to speculate.");
3756
3757 // If the branch is non-unpredictable, see if we either predictably jump to
3758 // the merge bb (if we have only a single 'then' block), or if we predictably
3759 // jump to one specific 'then' block (if we have two of them).
3760 // It isn't beneficial to speculatively execute the code
3761 // from the block that we know is predictably not entered.
3762 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3763 if (!IsUnpredictable) {
3764 uint64_t TWeight, FWeight;
3765 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3766 (TWeight + FWeight) != 0) {
3767 BranchProbability BITrueProb =
3768 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3769 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3770 BranchProbability BIFalseProb = BITrueProb.getCompl();
3771 if (IfBlocks.size() == 1) {
3772 BranchProbability BIBBProb =
3773 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3774 if (BIBBProb >= Likely)
3775 return false;
3776 } else {
3777 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3778 return false;
3779 }
3780 }
3781 }
3782
3783 // Don't try to fold an unreachable block. For example, the phi node itself
3784 // can't be the candidate if-condition for a select that we want to form.
3785 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3786 if (IfCondPhiInst->getParent() == BB)
3787 return false;
3788
3789 // Okay, we found that we can merge this two-entry phi node into a select.
3790 // Doing so would require us to fold *all* two entry phi nodes in this block.
3791 // At some point this becomes non-profitable (particularly if the target
3792 // doesn't support cmov's). Only do this transformation if there are two or
3793 // fewer PHI nodes in this block.
3794 unsigned NumPhis = 0;
3795 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3796 if (NumPhis > 2)
3797 return false;
3798
3799 // Loop over the PHI's seeing if we can promote them all to select
3800 // instructions. While we are at it, keep track of the instructions
3801 // that need to be moved to the dominating block.
3802 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3803 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3804 InstructionCost Cost = 0;
3805 InstructionCost Budget =
3807 if (SpeculateUnpredictables && IsUnpredictable)
3808 Budget += TTI.getBranchMispredictPenalty();
3809
3810 bool Changed = false;
3811 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3812 PHINode *PN = cast<PHINode>(II++);
3813 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3814 PN->replaceAllUsesWith(V);
3815 PN->eraseFromParent();
3816 Changed = true;
3817 continue;
3818 }
3819
3820 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3821 AggressiveInsts, Cost, Budget, TTI, AC,
3822 ZeroCostInstructions) ||
3823 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3824 AggressiveInsts, Cost, Budget, TTI, AC,
3825 ZeroCostInstructions))
3826 return Changed;
3827 }
3828
3829 // If we folded the first phi, PN dangles at this point. Refresh it. If
3830 // we ran out of PHIs then we simplified them all.
3831 PN = dyn_cast<PHINode>(BB->begin());
3832 if (!PN)
3833 return true;
3834
3835 // Return true if at least one of these is a 'not', and another is either
3836 // a 'not' too, or a constant.
3837 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3838 if (!match(V0, m_Not(m_Value())))
3839 std::swap(V0, V1);
3840 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3841 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3842 };
3843
3844 // Don't fold i1 branches on PHIs which contain binary operators or
3845 // (possibly inverted) select form of or/ands, unless one of
3846 // the incoming values is an 'not' and another one is freely invertible.
3847 // These can often be turned into switches and other things.
3848 auto IsBinOpOrAnd = [](Value *V) {
3849 return match(
3851 };
3852 if (PN->getType()->isIntegerTy(1) &&
3853 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3854 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3855 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3856 PN->getIncomingValue(1)))
3857 return Changed;
3858
3859 // If all PHI nodes are promotable, check to make sure that all instructions
3860 // in the predecessor blocks can be promoted as well. If not, we won't be able
3861 // to get rid of the control flow, so it's not worth promoting to select
3862 // instructions.
3863 for (BasicBlock *IfBlock : IfBlocks)
3864 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3865 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3866 // This is not an aggressive instruction that we can promote.
3867 // Because of this, we won't be able to get rid of the control flow, so
3868 // the xform is not worth it.
3869 return Changed;
3870 }
3871
3872 // If either of the blocks has it's address taken, we can't do this fold.
3873 if (any_of(IfBlocks,
3874 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3875 return Changed;
3876
3877 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3878 if (IsUnpredictable) dbgs() << " (unpredictable)";
3879 dbgs() << " T: " << IfTrue->getName()
3880 << " F: " << IfFalse->getName() << "\n");
3881
3882 // If we can still promote the PHI nodes after this gauntlet of tests,
3883 // do all of the PHI's now.
3884
3885 // Move all 'aggressive' instructions, which are defined in the
3886 // conditional parts of the if's up to the dominating block.
3887 for (BasicBlock *IfBlock : IfBlocks)
3888 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3889
3890 IRBuilder<NoFolder> Builder(DomBI);
3891 // Propagate fast-math-flags from phi nodes to replacement selects.
3892 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3893 // Change the PHI node into a select instruction.
3894 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3895 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3896
3897 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3898 isa<FPMathOperator>(PN) ? PN : nullptr,
3899 "", DomBI);
3900 PN->replaceAllUsesWith(Sel);
3901 Sel->takeName(PN);
3902 PN->eraseFromParent();
3903 }
3904
3905 // At this point, all IfBlocks are empty, so our if statement
3906 // has been flattened. Change DomBlock to jump directly to our new block to
3907 // avoid other simplifycfg's kicking in on the diamond.
3908 Builder.CreateBr(BB);
3909
3911 if (DTU) {
3912 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3913 for (auto *Successor : successors(DomBlock))
3914 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3915 }
3916
3917 DomBI->eraseFromParent();
3918 if (DTU)
3919 DTU->applyUpdates(Updates);
3920
3921 return true;
3922}
3923
3926 Value *RHS, const Twine &Name = "") {
3927 // Try to relax logical op to binary op.
3928 if (impliesPoison(RHS, LHS))
3929 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3930 if (Opc == Instruction::And)
3931 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3932 if (Opc == Instruction::Or)
3933 return Builder.CreateLogicalOr(LHS, RHS, Name);
3934 llvm_unreachable("Invalid logical opcode");
3935}
3936
3937/// Return true if either PBI or BI has branch weight available, and store
3938/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3939/// not have branch weight, use 1:1 as its weight.
3941 uint64_t &PredTrueWeight,
3942 uint64_t &PredFalseWeight,
3943 uint64_t &SuccTrueWeight,
3944 uint64_t &SuccFalseWeight) {
3945 bool PredHasWeights =
3946 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3947 bool SuccHasWeights =
3948 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3949 if (PredHasWeights || SuccHasWeights) {
3950 if (!PredHasWeights)
3951 PredTrueWeight = PredFalseWeight = 1;
3952 if (!SuccHasWeights)
3953 SuccTrueWeight = SuccFalseWeight = 1;
3954 return true;
3955 } else {
3956 return false;
3957 }
3958}
3959
3960/// Determine if the two branches share a common destination and deduce a glue
3961/// that joins the branches' conditions to arrive at the common destination if
3962/// that would be profitable.
3963static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3965 const TargetTransformInfo *TTI) {
3966 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3967 "Both blocks must end with a conditional branches.");
3969 "PredBB must be a predecessor of BB.");
3970
3971 // We have the potential to fold the conditions together, but if the
3972 // predecessor branch is predictable, we may not want to merge them.
3973 uint64_t PTWeight, PFWeight;
3974 BranchProbability PBITrueProb, Likely;
3975 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3976 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3977 (PTWeight + PFWeight) != 0) {
3978 PBITrueProb =
3979 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3980 Likely = TTI->getPredictableBranchThreshold();
3981 }
3982
3983 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3984 // Speculate the 2nd condition unless the 1st is probably true.
3985 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3986 return {{BI->getSuccessor(0), Instruction::Or, false}};
3987 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3988 // Speculate the 2nd condition unless the 1st is probably false.
3989 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3990 return {{BI->getSuccessor(1), Instruction::And, false}};
3991 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3992 // Speculate the 2nd condition unless the 1st is probably true.
3993 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3994 return {{BI->getSuccessor(1), Instruction::And, true}};
3995 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3996 // Speculate the 2nd condition unless the 1st is probably false.
3997 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3998 return {{BI->getSuccessor(0), Instruction::Or, true}};
3999 }
4000 return std::nullopt;
4001}
4002
4004 DomTreeUpdater *DTU,
4005 MemorySSAUpdater *MSSAU,
4006 const TargetTransformInfo *TTI) {
4007 BasicBlock *BB = BI->getParent();
4008 BasicBlock *PredBlock = PBI->getParent();
4009
4010 // Determine if the two branches share a common destination.
4011 BasicBlock *CommonSucc;
4013 bool InvertPredCond;
4014 std::tie(CommonSucc, Opc, InvertPredCond) =
4016
4017 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4018
4019 IRBuilder<> Builder(PBI);
4020 // The builder is used to create instructions to eliminate the branch in BB.
4021 // If BB's terminator has !annotation metadata, add it to the new
4022 // instructions.
4023 Builder.CollectMetadataToCopy(BB->getTerminator(),
4024 {LLVMContext::MD_annotation});
4025
4026 // If we need to invert the condition in the pred block to match, do so now.
4027 if (InvertPredCond) {
4028 InvertBranch(PBI, Builder);
4029 }
4030
4031 BasicBlock *UniqueSucc =
4032 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4033
4034 // Before cloning instructions, notify the successor basic block that it
4035 // is about to have a new predecessor. This will update PHI nodes,
4036 // which will allow us to update live-out uses of bonus instructions.
4037 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4038
4039 // Try to update branch weights.
4040 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4041 SmallVector<uint64_t, 2> MDWeights;
4042 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4043 SuccTrueWeight, SuccFalseWeight)) {
4044
4045 if (PBI->getSuccessor(0) == BB) {
4046 // PBI: br i1 %x, BB, FalseDest
4047 // BI: br i1 %y, UniqueSucc, FalseDest
4048 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4049 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4050 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4051 // TrueWeight for PBI * FalseWeight for BI.
4052 // We assume that total weights of a BranchInst can fit into 32 bits.
4053 // Therefore, we will not have overflow using 64-bit arithmetic.
4054 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4055 PredTrueWeight * SuccFalseWeight);
4056 } else {
4057 // PBI: br i1 %x, TrueDest, BB
4058 // BI: br i1 %y, TrueDest, UniqueSucc
4059 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4060 // FalseWeight for PBI * TrueWeight for BI.
4061 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4062 PredFalseWeight * SuccTrueWeight);
4063 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4064 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4065 }
4066
4067 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4068 /*ElideAllZero=*/true);
4069
4070 // TODO: If BB is reachable from all paths through PredBlock, then we
4071 // could replace PBI's branch probabilities with BI's.
4072 } else
4073 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4074
4075 // Now, update the CFG.
4076 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4077
4078 if (DTU)
4079 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4080 {DominatorTree::Delete, PredBlock, BB}});
4081
4082 // If BI was a loop latch, it may have had associated loop metadata.
4083 // We need to copy it to the new latch, that is, PBI.
4084 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4085 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4086
4087 ValueToValueMapTy VMap; // maps original values to cloned values
4089
4090 Module *M = BB->getModule();
4091
4092 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4093 for (DbgVariableRecord &DVR :
4095 RemapDbgRecord(M, &DVR, VMap,
4097 }
4098
4099 // Now that the Cond was cloned into the predecessor basic block,
4100 // or/and the two conditions together.
4101 Value *BICond = VMap[BI->getCondition()];
4102 PBI->setCondition(
4103 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4105 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4106 if (!MDWeights.empty()) {
4107 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4108 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4109 /*IsExpected=*/false, /*ElideAllZero=*/true);
4110 }
4111
4112 ++NumFoldBranchToCommonDest;
4113 return true;
4114}
4115
4116/// Return if an instruction's type or any of its operands' types are a vector
4117/// type.
4118static bool isVectorOp(Instruction &I) {
4119 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4120 return U->getType()->isVectorTy();
4121 });
4122}
4123
4124/// If this basic block is simple enough, and if a predecessor branches to us
4125/// and one of our successors, fold the block into the predecessor and use
4126/// logical operations to pick the right destination.
4128 MemorySSAUpdater *MSSAU,
4129 const TargetTransformInfo *TTI,
4130 unsigned BonusInstThreshold) {
4131 // If this block ends with an unconditional branch,
4132 // let speculativelyExecuteBB() deal with it.
4133 if (!BI->isConditional())
4134 return false;
4135
4136 BasicBlock *BB = BI->getParent();
4140
4142
4144 Cond->getParent() != BB || !Cond->hasOneUse())
4145 return false;
4146
4147 // Finally, don't infinitely unroll conditional loops.
4148 if (is_contained(successors(BB), BB))
4149 return false;
4150
4151 // With which predecessors will we want to deal with?
4153 for (BasicBlock *PredBlock : predecessors(BB)) {
4154 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4155
4156 // Check that we have two conditional branches. If there is a PHI node in
4157 // the common successor, verify that the same value flows in from both
4158 // blocks.
4159 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4160 continue;
4161
4162 // Determine if the two branches share a common destination.
4163 BasicBlock *CommonSucc;
4165 bool InvertPredCond;
4166 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4167 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4168 else
4169 continue;
4170
4171 // Check the cost of inserting the necessary logic before performing the
4172 // transformation.
4173 if (TTI) {
4174 Type *Ty = BI->getCondition()->getType();
4175 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4176 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4177 !isa<CmpInst>(PBI->getCondition())))
4178 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4179
4181 continue;
4182 }
4183
4184 // Ok, we do want to deal with this predecessor. Record it.
4185 Preds.emplace_back(PredBlock);
4186 }
4187
4188 // If there aren't any predecessors into which we can fold,
4189 // don't bother checking the cost.
4190 if (Preds.empty())
4191 return false;
4192
4193 // Only allow this transformation if computing the condition doesn't involve
4194 // too many instructions and these involved instructions can be executed
4195 // unconditionally. We denote all involved instructions except the condition
4196 // as "bonus instructions", and only allow this transformation when the
4197 // number of the bonus instructions we'll need to create when cloning into
4198 // each predecessor does not exceed a certain threshold.
4199 unsigned NumBonusInsts = 0;
4200 bool SawVectorOp = false;
4201 const unsigned PredCount = Preds.size();
4202 for (Instruction &I : *BB) {
4203 // Don't check the branch condition comparison itself.
4204 if (&I == Cond)
4205 continue;
4206 // Ignore the terminator.
4207 if (isa<BranchInst>(I))
4208 continue;
4209 // I must be safe to execute unconditionally.
4211 return false;
4212 SawVectorOp |= isVectorOp(I);
4213
4214 // Account for the cost of duplicating this instruction into each
4215 // predecessor. Ignore free instructions.
4216 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4218 NumBonusInsts += PredCount;
4219
4220 // Early exits once we reach the limit.
4221 if (NumBonusInsts >
4222 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4223 return false;
4224 }
4225
4226 auto IsBCSSAUse = [BB, &I](Use &U) {
4227 auto *UI = cast<Instruction>(U.getUser());
4228 if (auto *PN = dyn_cast<PHINode>(UI))
4229 return PN->getIncomingBlock(U) == BB;
4230 return UI->getParent() == BB && I.comesBefore(UI);
4231 };
4232
4233 // Does this instruction require rewriting of uses?
4234 if (!all_of(I.uses(), IsBCSSAUse))
4235 return false;
4236 }
4237 if (NumBonusInsts >
4238 BonusInstThreshold *
4239 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4240 return false;
4241
4242 // Ok, we have the budget. Perform the transformation.
4243 for (BasicBlock *PredBlock : Preds) {
4244 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4245 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4246 }
4247 return false;
4248}
4249
4250// If there is only one store in BB1 and BB2, return it, otherwise return
4251// nullptr.
4253 StoreInst *S = nullptr;
4254 for (auto *BB : {BB1, BB2}) {
4255 if (!BB)
4256 continue;
4257 for (auto &I : *BB)
4258 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4259 if (S)
4260 // Multiple stores seen.
4261 return nullptr;
4262 else
4263 S = SI;
4264 }
4265 }
4266 return S;
4267}
4268
4270 Value *AlternativeV = nullptr) {
4271 // PHI is going to be a PHI node that allows the value V that is defined in
4272 // BB to be referenced in BB's only successor.
4273 //
4274 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4275 // doesn't matter to us what the other operand is (it'll never get used). We
4276 // could just create a new PHI with an undef incoming value, but that could
4277 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4278 // other PHI. So here we directly look for some PHI in BB's successor with V
4279 // as an incoming operand. If we find one, we use it, else we create a new
4280 // one.
4281 //
4282 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4283 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4284 // where OtherBB is the single other predecessor of BB's only successor.
4285 PHINode *PHI = nullptr;
4286 BasicBlock *Succ = BB->getSingleSuccessor();
4287
4288 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4289 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4290 PHI = cast<PHINode>(I);
4291 if (!AlternativeV)
4292 break;
4293
4294 assert(Succ->hasNPredecessors(2));
4295 auto PredI = pred_begin(Succ);
4296 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4297 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4298 break;
4299 PHI = nullptr;
4300 }
4301 if (PHI)
4302 return PHI;
4303
4304 // If V is not an instruction defined in BB, just return it.
4305 if (!AlternativeV &&
4306 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4307 return V;
4308
4309 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4310 PHI->insertBefore(Succ->begin());
4311 PHI->addIncoming(V, BB);
4312 for (BasicBlock *PredBB : predecessors(Succ))
4313 if (PredBB != BB)
4314 PHI->addIncoming(
4315 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4316 return PHI;
4317}
4318
4320 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4321 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4322 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4323 // For every pointer, there must be exactly two stores, one coming from
4324 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4325 // store (to any address) in PTB,PFB or QTB,QFB.
4326 // FIXME: We could relax this restriction with a bit more work and performance
4327 // testing.
4328 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4329 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4330 if (!PStore || !QStore)
4331 return false;
4332
4333 // Now check the stores are compatible.
4334 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4335 PStore->getValueOperand()->getType() !=
4336 QStore->getValueOperand()->getType())
4337 return false;
4338
4339 // Check that sinking the store won't cause program behavior changes. Sinking
4340 // the store out of the Q blocks won't change any behavior as we're sinking
4341 // from a block to its unconditional successor. But we're moving a store from
4342 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4343 // So we need to check that there are no aliasing loads or stores in
4344 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4345 // operations between PStore and the end of its parent block.
4346 //
4347 // The ideal way to do this is to query AliasAnalysis, but we don't
4348 // preserve AA currently so that is dangerous. Be super safe and just
4349 // check there are no other memory operations at all.
4350 for (auto &I : *QFB->getSinglePredecessor())
4351 if (I.mayReadOrWriteMemory())
4352 return false;
4353 for (auto &I : *QFB)
4354 if (&I != QStore && I.mayReadOrWriteMemory())
4355 return false;
4356 if (QTB)
4357 for (auto &I : *QTB)
4358 if (&I != QStore && I.mayReadOrWriteMemory())
4359 return false;
4360 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4361 I != E; ++I)
4362 if (&*I != PStore && I->mayReadOrWriteMemory())
4363 return false;
4364
4365 // If we're not in aggressive mode, we only optimize if we have some
4366 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4367 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4368 if (!BB)
4369 return true;
4370 // Heuristic: if the block can be if-converted/phi-folded and the
4371 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4372 // thread this store.
4373 InstructionCost Cost = 0;
4374 InstructionCost Budget =
4376 for (auto &I : BB->instructionsWithoutDebug(false)) {
4377 // Consider terminator instruction to be free.
4378 if (I.isTerminator())
4379 continue;
4380 // If this is one the stores that we want to speculate out of this BB,
4381 // then don't count it's cost, consider it to be free.
4382 if (auto *S = dyn_cast<StoreInst>(&I))
4383 if (llvm::find(FreeStores, S))
4384 continue;
4385 // Else, we have a white-list of instructions that we are ak speculating.
4387 return false; // Not in white-list - not worthwhile folding.
4388 // And finally, if this is a non-free instruction that we are okay
4389 // speculating, ensure that we consider the speculation budget.
4390 Cost +=
4391 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4392 if (Cost > Budget)
4393 return false; // Eagerly refuse to fold as soon as we're out of budget.
4394 }
4395 assert(Cost <= Budget &&
4396 "When we run out of budget we will eagerly return from within the "
4397 "per-instruction loop.");
4398 return true;
4399 };
4400
4401 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4403 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4404 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4405 return false;
4406
4407 // If PostBB has more than two predecessors, we need to split it so we can
4408 // sink the store.
4409 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4410 // We know that QFB's only successor is PostBB. And QFB has a single
4411 // predecessor. If QTB exists, then its only successor is also PostBB.
4412 // If QTB does not exist, then QFB's only predecessor has a conditional
4413 // branch to QFB and PostBB.
4414 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4415 BasicBlock *NewBB =
4416 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4417 if (!NewBB)
4418 return false;
4419 PostBB = NewBB;
4420 }
4421
4422 // OK, we're going to sink the stores to PostBB. The store has to be
4423 // conditional though, so first create the predicate.
4424 BranchInst *PBranch =
4426 BranchInst *QBranch =
4428 Value *PCond = PBranch->getCondition();
4429 Value *QCond = QBranch->getCondition();
4430
4432 PStore->getParent());
4434 QStore->getParent(), PPHI);
4435
4436 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4437 IRBuilder<> QB(PostBB, PostBBFirst);
4438 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4439
4440 InvertPCond ^= (PStore->getParent() != PTB);
4441 InvertQCond ^= (QStore->getParent() != QTB);
4442 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4443 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4444
4445 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4446
4447 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4448 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4449 /*Unreachable=*/false,
4450 /*BranchWeights=*/nullptr, DTU);
4451 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4453 SmallVector<uint32_t, 2> PWeights, QWeights;
4454 extractBranchWeights(*PBranch, PWeights);
4455 extractBranchWeights(*QBranch, QWeights);
4456 if (InvertPCond)
4457 std::swap(PWeights[0], PWeights[1]);
4458 if (InvertQCond)
4459 std::swap(QWeights[0], QWeights[1]);
4460 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4462 {CombinedWeights[0], CombinedWeights[1]},
4463 /*IsExpected=*/false, /*ElideAllZero=*/true);
4464 }
4465
4466 QB.SetInsertPoint(T);
4467 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4468 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4469 // Choose the minimum alignment. If we could prove both stores execute, we
4470 // could use biggest one. In this case, though, we only know that one of the
4471 // stores executes. And we don't know it's safe to take the alignment from a
4472 // store that doesn't execute.
4473 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4474
4475 QStore->eraseFromParent();
4476 PStore->eraseFromParent();
4477
4478 return true;
4479}
4480
4482 DomTreeUpdater *DTU, const DataLayout &DL,
4483 const TargetTransformInfo &TTI) {
4484 // The intention here is to find diamonds or triangles (see below) where each
4485 // conditional block contains a store to the same address. Both of these
4486 // stores are conditional, so they can't be unconditionally sunk. But it may
4487 // be profitable to speculatively sink the stores into one merged store at the
4488 // end, and predicate the merged store on the union of the two conditions of
4489 // PBI and QBI.
4490 //
4491 // This can reduce the number of stores executed if both of the conditions are
4492 // true, and can allow the blocks to become small enough to be if-converted.
4493 // This optimization will also chain, so that ladders of test-and-set
4494 // sequences can be if-converted away.
4495 //
4496 // We only deal with simple diamonds or triangles:
4497 //
4498 // PBI or PBI or a combination of the two
4499 // / \ | \
4500 // PTB PFB | PFB
4501 // \ / | /
4502 // QBI QBI
4503 // / \ | \
4504 // QTB QFB | QFB
4505 // \ / | /
4506 // PostBB PostBB
4507 //
4508 // We model triangles as a type of diamond with a nullptr "true" block.
4509 // Triangles are canonicalized so that the fallthrough edge is represented by
4510 // a true condition, as in the diagram above.
4511 BasicBlock *PTB = PBI->getSuccessor(0);
4512 BasicBlock *PFB = PBI->getSuccessor(1);
4513 BasicBlock *QTB = QBI->getSuccessor(0);
4514 BasicBlock *QFB = QBI->getSuccessor(1);
4515 BasicBlock *PostBB = QFB->getSingleSuccessor();
4516
4517 // Make sure we have a good guess for PostBB. If QTB's only successor is
4518 // QFB, then QFB is a better PostBB.
4519 if (QTB->getSingleSuccessor() == QFB)
4520 PostBB = QFB;
4521
4522 // If we couldn't find a good PostBB, stop.
4523 if (!PostBB)
4524 return false;
4525
4526 bool InvertPCond = false, InvertQCond = false;
4527 // Canonicalize fallthroughs to the true branches.
4528 if (PFB == QBI->getParent()) {
4529 std::swap(PFB, PTB);
4530 InvertPCond = true;
4531 }
4532 if (QFB == PostBB) {
4533 std::swap(QFB, QTB);
4534 InvertQCond = true;
4535 }
4536
4537 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4538 // and QFB may not. Model fallthroughs as a nullptr block.
4539 if (PTB == QBI->getParent())
4540 PTB = nullptr;
4541 if (QTB == PostBB)
4542 QTB = nullptr;
4543
4544 // Legality bailouts. We must have at least the non-fallthrough blocks and
4545 // the post-dominating block, and the non-fallthroughs must only have one
4546 // predecessor.
4547 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4548 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4549 };
4550 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4551 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4552 return false;
4553 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4554 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4555 return false;
4556 if (!QBI->getParent()->hasNUses(2))
4557 return false;
4558
4559 // OK, this is a sequence of two diamonds or triangles.
4560 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4561 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4562 for (auto *BB : {PTB, PFB}) {
4563 if (!BB)
4564 continue;
4565 for (auto &I : *BB)
4567 PStoreAddresses.insert(SI->getPointerOperand());
4568 }
4569 for (auto *BB : {QTB, QFB}) {
4570 if (!BB)
4571 continue;
4572 for (auto &I : *BB)
4574 QStoreAddresses.insert(SI->getPointerOperand());
4575 }
4576
4577 set_intersect(PStoreAddresses, QStoreAddresses);
4578 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4579 // clear what it contains.
4580 auto &CommonAddresses = PStoreAddresses;
4581
4582 bool Changed = false;
4583 for (auto *Address : CommonAddresses)
4584 Changed |=
4585 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4586 InvertPCond, InvertQCond, DTU, DL, TTI);
4587 return Changed;
4588}
4589
4590/// If the previous block ended with a widenable branch, determine if reusing
4591/// the target block is profitable and legal. This will have the effect of
4592/// "widening" PBI, but doesn't require us to reason about hosting safety.
4594 DomTreeUpdater *DTU) {
4595 // TODO: This can be generalized in two important ways:
4596 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4597 // values from the PBI edge.
4598 // 2) We can sink side effecting instructions into BI's fallthrough
4599 // successor provided they doesn't contribute to computation of
4600 // BI's condition.
4601 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4602 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4603 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4604 !BI->getParent()->getSinglePredecessor())
4605 return false;
4606 if (!IfFalseBB->phis().empty())
4607 return false; // TODO
4608 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4609 // may undo the transform done here.
4610 // TODO: There might be a more fine-grained solution to this.
4611 if (!llvm::succ_empty(IfFalseBB))
4612 return false;
4613 // Use lambda to lazily compute expensive condition after cheap ones.
4614 auto NoSideEffects = [](BasicBlock &BB) {
4615 return llvm::none_of(BB, [](const Instruction &I) {
4616 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4617 });
4618 };
4619 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4620 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4621 NoSideEffects(*BI->getParent())) {
4622 auto *OldSuccessor = BI->getSuccessor(1);
4623 OldSuccessor->removePredecessor(BI->getParent());
4624 BI->setSuccessor(1, IfFalseBB);
4625 if (DTU)
4626 DTU->applyUpdates(
4627 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4628 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4629 return true;
4630 }
4631 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4632 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4633 NoSideEffects(*BI->getParent())) {
4634 auto *OldSuccessor = BI->getSuccessor(0);
4635 OldSuccessor->removePredecessor(BI->getParent());
4636 BI->setSuccessor(0, IfFalseBB);
4637 if (DTU)
4638 DTU->applyUpdates(
4639 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4640 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4641 return true;
4642 }
4643 return false;
4644}
4645
4646/// If we have a conditional branch as a predecessor of another block,
4647/// this function tries to simplify it. We know
4648/// that PBI and BI are both conditional branches, and BI is in one of the
4649/// successor blocks of PBI - PBI branches to BI.
4651 DomTreeUpdater *DTU,
4652 const DataLayout &DL,
4653 const TargetTransformInfo &TTI) {
4654 assert(PBI->isConditional() && BI->isConditional());
4655 BasicBlock *BB = BI->getParent();
4656
4657 // If this block ends with a branch instruction, and if there is a
4658 // predecessor that ends on a branch of the same condition, make
4659 // this conditional branch redundant.
4660 if (PBI->getCondition() == BI->getCondition() &&
4661 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4662 // Okay, the outcome of this conditional branch is statically
4663 // knowable. If this block had a single pred, handle specially, otherwise
4664 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4665 if (BB->getSinglePredecessor()) {
4666 // Turn this into a branch on constant.
4667 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4668 BI->setCondition(
4669 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4670 return true; // Nuke the branch on constant.
4671 }
4672 }
4673
4674 // If the previous block ended with a widenable branch, determine if reusing
4675 // the target block is profitable and legal. This will have the effect of
4676 // "widening" PBI, but doesn't require us to reason about hosting safety.
4677 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4678 return true;
4679
4680 // If both branches are conditional and both contain stores to the same
4681 // address, remove the stores from the conditionals and create a conditional
4682 // merged store at the end.
4683 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4684 return true;
4685
4686 // If this is a conditional branch in an empty block, and if any
4687 // predecessors are a conditional branch to one of our destinations,
4688 // fold the conditions into logical ops and one cond br.
4689
4690 // Ignore dbg intrinsics.
4691 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4692 return false;
4693
4694 int PBIOp, BIOp;
4695 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4696 PBIOp = 0;
4697 BIOp = 0;
4698 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4699 PBIOp = 0;
4700 BIOp = 1;
4701 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4702 PBIOp = 1;
4703 BIOp = 0;
4704 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4705 PBIOp = 1;
4706 BIOp = 1;
4707 } else {
4708 return false;
4709 }
4710
4711 // Check to make sure that the other destination of this branch
4712 // isn't BB itself. If so, this is an infinite loop that will
4713 // keep getting unwound.
4714 if (PBI->getSuccessor(PBIOp) == BB)
4715 return false;
4716
4717 // If predecessor's branch probability to BB is too low don't merge branches.
4718 SmallVector<uint32_t, 2> PredWeights;
4719 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4720 extractBranchWeights(*PBI, PredWeights) &&
4721 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4722
4724 PredWeights[PBIOp],
4725 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4726
4727 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4728 if (CommonDestProb >= Likely)
4729 return false;
4730 }
4731
4732 // Do not perform this transformation if it would require
4733 // insertion of a large number of select instructions. For targets
4734 // without predication/cmovs, this is a big pessimization.
4735
4736 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4737 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4738 unsigned NumPhis = 0;
4739 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4740 ++II, ++NumPhis) {
4741 if (NumPhis > 2) // Disable this xform.
4742 return false;
4743 }
4744
4745 // Finally, if everything is ok, fold the branches to logical ops.
4746 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4747
4748 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4749 << "AND: " << *BI->getParent());
4750
4752
4753 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4754 // branch in it, where one edge (OtherDest) goes back to itself but the other
4755 // exits. We don't *know* that the program avoids the infinite loop
4756 // (even though that seems likely). If we do this xform naively, we'll end up
4757 // recursively unpeeling the loop. Since we know that (after the xform is
4758 // done) that the block *is* infinite if reached, we just make it an obviously
4759 // infinite loop with no cond branch.
4760 if (OtherDest == BB) {
4761 // Insert it at the end of the function, because it's either code,
4762 // or it won't matter if it's hot. :)
4763 BasicBlock *InfLoopBlock =
4764 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4765 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4766 if (DTU)
4767 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4768 OtherDest = InfLoopBlock;
4769 }
4770
4771 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4772
4773 // BI may have other predecessors. Because of this, we leave
4774 // it alone, but modify PBI.
4775
4776 // Make sure we get to CommonDest on True&True directions.
4777 Value *PBICond = PBI->getCondition();
4778 IRBuilder<NoFolder> Builder(PBI);
4779 if (PBIOp)
4780 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4781
4782 Value *BICond = BI->getCondition();
4783 if (BIOp)
4784 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4785
4786 // Merge the conditions.
4787 Value *Cond =
4788 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4789
4790 // Modify PBI to branch on the new condition to the new dests.
4791 PBI->setCondition(Cond);
4792 PBI->setSuccessor(0, CommonDest);
4793 PBI->setSuccessor(1, OtherDest);
4794
4795 if (DTU) {
4796 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4797 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4798
4799 DTU->applyUpdates(Updates);
4800 }
4801
4802 // Update branch weight for PBI.
4803 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4804 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4805 bool HasWeights =
4806 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4807 SuccTrueWeight, SuccFalseWeight);
4808 if (HasWeights) {
4809 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4810 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4811 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4812 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4813 // The weight to CommonDest should be PredCommon * SuccTotal +
4814 // PredOther * SuccCommon.
4815 // The weight to OtherDest should be PredOther * SuccOther.
4816 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4817 PredOther * SuccCommon,
4818 PredOther * SuccOther};
4819
4820 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4821 /*ElideAllZero=*/true);
4822 // Cond may be a select instruction with the first operand set to "true", or
4823 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4825 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4826 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4827 // The select is predicated on PBICond
4829 // The corresponding probabilities are what was referred to above as
4830 // PredCommon and PredOther.
4831 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4832 /*IsExpected=*/false, /*ElideAllZero=*/true);
4833 }
4834 }
4835
4836 // OtherDest may have phi nodes. If so, add an entry from PBI's
4837 // block that are identical to the entries for BI's block.
4838 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4839
4840 // We know that the CommonDest already had an edge from PBI to
4841 // it. If it has PHIs though, the PHIs may have different
4842 // entries for BB and PBI's BB. If so, insert a select to make
4843 // them agree.
4844 for (PHINode &PN : CommonDest->phis()) {
4845 Value *BIV = PN.getIncomingValueForBlock(BB);
4846 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4847 Value *PBIV = PN.getIncomingValue(PBBIdx);
4848 if (BIV != PBIV) {
4849 // Insert a select in PBI to pick the right value.
4851 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4852 PN.setIncomingValue(PBBIdx, NV);
4853 // The select has the same condition as PBI, in the same BB. The
4854 // probabilities don't change.
4855 if (HasWeights) {
4856 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4857 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4858 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4859 /*IsExpected=*/false, /*ElideAllZero=*/true);
4860 }
4861 }
4862 }
4863
4864 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4865 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4866
4867 // This basic block is probably dead. We know it has at least
4868 // one fewer predecessor.
4869 return true;
4870}
4871
4872// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4873// true or to FalseBB if Cond is false.
4874// Takes care of updating the successors and removing the old terminator.
4875// Also makes sure not to introduce new successors by assuming that edges to
4876// non-successor TrueBBs and FalseBBs aren't reachable.
4877bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4878 Value *Cond, BasicBlock *TrueBB,
4879 BasicBlock *FalseBB,
4880 uint32_t TrueWeight,
4881 uint32_t FalseWeight) {
4882 auto *BB = OldTerm->getParent();
4883 // Remove any superfluous successor edges from the CFG.
4884 // First, figure out which successors to preserve.
4885 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4886 // successor.
4887 BasicBlock *KeepEdge1 = TrueBB;
4888 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4889
4890 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4891
4892 // Then remove the rest.
4893 for (BasicBlock *Succ : successors(OldTerm)) {
4894 // Make sure only to keep exactly one copy of each edge.
4895 if (Succ == KeepEdge1)
4896 KeepEdge1 = nullptr;
4897 else if (Succ == KeepEdge2)
4898 KeepEdge2 = nullptr;
4899 else {
4900 Succ->removePredecessor(BB,
4901 /*KeepOneInputPHIs=*/true);
4902
4903 if (Succ != TrueBB && Succ != FalseBB)
4904 RemovedSuccessors.insert(Succ);
4905 }
4906 }
4907
4908 IRBuilder<> Builder(OldTerm);
4909 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4910
4911 // Insert an appropriate new terminator.
4912 if (!KeepEdge1 && !KeepEdge2) {
4913 if (TrueBB == FalseBB) {
4914 // We were only looking for one successor, and it was present.
4915 // Create an unconditional branch to it.
4916 Builder.CreateBr(TrueBB);
4917 } else {
4918 // We found both of the successors we were looking for.
4919 // Create a conditional branch sharing the condition of the select.
4920 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4921 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4922 /*IsExpected=*/false, /*ElideAllZero=*/true);
4923 }
4924 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4925 // Neither of the selected blocks were successors, so this
4926 // terminator must be unreachable.
4927 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4928 } else {
4929 // One of the selected values was a successor, but the other wasn't.
4930 // Insert an unconditional branch to the one that was found;
4931 // the edge to the one that wasn't must be unreachable.
4932 if (!KeepEdge1) {
4933 // Only TrueBB was found.
4934 Builder.CreateBr(TrueBB);
4935 } else {
4936 // Only FalseBB was found.
4937 Builder.CreateBr(FalseBB);
4938 }
4939 }
4940
4942
4943 if (DTU) {
4944 SmallVector<DominatorTree::UpdateType, 2> Updates;
4945 Updates.reserve(RemovedSuccessors.size());
4946 for (auto *RemovedSuccessor : RemovedSuccessors)
4947 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4948 DTU->applyUpdates(Updates);
4949 }
4950
4951 return true;
4952}
4953
4954// Replaces
4955// (switch (select cond, X, Y)) on constant X, Y
4956// with a branch - conditional if X and Y lead to distinct BBs,
4957// unconditional otherwise.
4958bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4959 SelectInst *Select) {
4960 // Check for constant integer values in the select.
4961 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4962 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4963 if (!TrueVal || !FalseVal)
4964 return false;
4965
4966 // Find the relevant condition and destinations.
4967 Value *Condition = Select->getCondition();
4968 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4969 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4970
4971 // Get weight for TrueBB and FalseBB.
4972 uint32_t TrueWeight = 0, FalseWeight = 0;
4973 SmallVector<uint64_t, 8> Weights;
4974 bool HasWeights = hasBranchWeightMD(*SI);
4975 if (HasWeights) {
4976 getBranchWeights(SI, Weights);
4977 if (Weights.size() == 1 + SI->getNumCases()) {
4978 TrueWeight =
4979 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4980 FalseWeight =
4981 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4982 }
4983 }
4984
4985 // Perform the actual simplification.
4986 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4987 FalseWeight);
4988}
4989
4990// Replaces
4991// (indirectbr (select cond, blockaddress(@fn, BlockA),
4992// blockaddress(@fn, BlockB)))
4993// with
4994// (br cond, BlockA, BlockB).
4995bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4996 SelectInst *SI) {
4997 // Check that both operands of the select are block addresses.
4998 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4999 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5000 if (!TBA || !FBA)
5001 return false;
5002
5003 // Extract the actual blocks.
5004 BasicBlock *TrueBB = TBA->getBasicBlock();
5005 BasicBlock *FalseBB = FBA->getBasicBlock();
5006
5007 // The select's profile becomes the profile of the conditional branch that
5008 // replaces the indirect branch.
5009 SmallVector<uint32_t> SelectBranchWeights(2);
5011 extractBranchWeights(*SI, SelectBranchWeights);
5012 // Perform the actual simplification.
5013 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5014 SelectBranchWeights[0],
5015 SelectBranchWeights[1]);
5016}
5017
5018/// This is called when we find an icmp instruction
5019/// (a seteq/setne with a constant) as the only instruction in a
5020/// block that ends with an uncond branch. We are looking for a very specific
5021/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5022/// this case, we merge the first two "or's of icmp" into a switch, but then the
5023/// default value goes to an uncond block with a seteq in it, we get something
5024/// like:
5025///
5026/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5027/// DEFAULT:
5028/// %tmp = icmp eq i8 %A, 92
5029/// br label %end
5030/// end:
5031/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5032///
5033/// We prefer to split the edge to 'end' so that there is a true/false entry to
5034/// the PHI, merging the third icmp into the switch.
5035bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5036 ICmpInst *ICI, IRBuilder<> &Builder) {
5037 // Select == nullptr means we assume that there is a hidden no-op select
5038 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5039 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5040}
5041
5042/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5043/// case. This is called when we find an icmp instruction (a seteq/setne with a
5044/// constant) and its following select instruction as the only TWO instructions
5045/// in a block that ends with an uncond branch. We are looking for a very
5046/// specific pattern that occurs when "
5047/// if (A == 1) return C1;
5048/// if (A == 2) return C2;
5049/// if (A < 3) return C3;
5050/// return C4;
5051/// " gets simplified. In this case, we merge the first two "branches of icmp"
5052/// into a switch, but then the default value goes to an uncond block with a lt
5053/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5054/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5055/// get something like:
5056///
5057/// case1:
5058/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5059/// case2:
5060/// br label %end
5061/// DEFAULT:
5062/// %tmp = icmp eq i8 %A, 2
5063/// %val = select i1 %tmp, i8 C3, i8 C4
5064/// br label %end
5065/// end:
5066/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5067///
5068/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5069/// to the PHI, merging the icmp & select into the switch, as follows:
5070///
5071/// case1:
5072/// switch i8 %A, label %DEFAULT [
5073/// i8 0, label %end
5074/// i8 1, label %case2
5075/// i8 2, label %case3
5076/// ]
5077/// case2:
5078/// br label %end
5079/// case3:
5080/// br label %end
5081/// DEFAULT:
5082/// br label %end
5083/// end:
5084/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5085bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5086 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5087 BasicBlock *BB = ICI->getParent();
5088
5089 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5090 // too complex.
5091 /// TODO: support multi-phis in succ BB of select's BB.
5092 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5093 (Select && !Select->hasOneUse()))
5094 return false;
5095
5096 // The pattern we're looking for is where our only predecessor is a switch on
5097 // 'V' and this block is the default case for the switch. In this case we can
5098 // fold the compared value into the switch to simplify things.
5099 BasicBlock *Pred = BB->getSinglePredecessor();
5100 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5101 return false;
5102
5103 Value *IcmpCond;
5104 ConstantInt *NewCaseVal;
5105 CmpPredicate Predicate;
5106
5107 // Match icmp X, C
5108 if (!match(ICI,
5109 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5110 return false;
5111
5112 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5114 if (!Select) {
5115 // If Select == nullptr, we can assume that there is a hidden no-op select
5116 // just after icmp
5117 SelectCond = ICI;
5118 SelectTrueVal = Builder.getTrue();
5119 SelectFalseVal = Builder.getFalse();
5120 User = ICI->user_back();
5121 } else {
5122 SelectCond = Select->getCondition();
5123 // Check if the select condition is the same as the icmp condition.
5124 if (SelectCond != ICI)
5125 return false;
5126 SelectTrueVal = Select->getTrueValue();
5127 SelectFalseVal = Select->getFalseValue();
5128 User = Select->user_back();
5129 }
5130
5131 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5132 if (SI->getCondition() != IcmpCond)
5133 return false;
5134
5135 // If BB is reachable on a non-default case, then we simply know the value of
5136 // V in this block. Substitute it and constant fold the icmp instruction
5137 // away.
5138 if (SI->getDefaultDest() != BB) {
5139 ConstantInt *VVal = SI->findCaseDest(BB);
5140 assert(VVal && "Should have a unique destination value");
5141 ICI->setOperand(0, VVal);
5142
5143 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5144 ICI->replaceAllUsesWith(V);
5145 ICI->eraseFromParent();
5146 }
5147 // BB is now empty, so it is likely to simplify away.
5148 return requestResimplify();
5149 }
5150
5151 // Ok, the block is reachable from the default dest. If the constant we're
5152 // comparing exists in one of the other edges, then we can constant fold ICI
5153 // and zap it.
5154 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5155 Value *V;
5156 if (Predicate == ICmpInst::ICMP_EQ)
5158 else
5160
5161 ICI->replaceAllUsesWith(V);
5162 ICI->eraseFromParent();
5163 // BB is now empty, so it is likely to simplify away.
5164 return requestResimplify();
5165 }
5166
5167 // The use of the select has to be in the 'end' block, by the only PHI node in
5168 // the block.
5169 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5170 PHINode *PHIUse = dyn_cast<PHINode>(User);
5171 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5173 return false;
5174
5175 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5176 // edge gets SelectTrueVal in the PHI.
5177 Value *DefaultCst = SelectFalseVal;
5178 Value *NewCst = SelectTrueVal;
5179
5180 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5181 std::swap(DefaultCst, NewCst);
5182
5183 // Replace Select (which is used by the PHI for the default value) with
5184 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5185 if (Select) {
5186 Select->replaceAllUsesWith(DefaultCst);
5187 Select->eraseFromParent();
5188 } else {
5189 ICI->replaceAllUsesWith(DefaultCst);
5190 }
5191 ICI->eraseFromParent();
5192
5193 SmallVector<DominatorTree::UpdateType, 2> Updates;
5194
5195 // Okay, the switch goes to this block on a default value. Add an edge from
5196 // the switch to the merge point on the compared value.
5197 BasicBlock *NewBB =
5198 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5199 {
5200 SwitchInstProfUpdateWrapper SIW(*SI);
5201 auto W0 = SIW.getSuccessorWeight(0);
5203 if (W0) {
5204 NewW = ((uint64_t(*W0) + 1) >> 1);
5205 SIW.setSuccessorWeight(0, *NewW);
5206 }
5207 SIW.addCase(NewCaseVal, NewBB, NewW);
5208 if (DTU)
5209 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5210 }
5211
5212 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5213 Builder.SetInsertPoint(NewBB);
5214 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5215 Builder.CreateBr(SuccBlock);
5216 PHIUse->addIncoming(NewCst, NewBB);
5217 if (DTU) {
5218 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5219 DTU->applyUpdates(Updates);
5220 }
5221 return true;
5222}
5223
5224/// The specified branch is a conditional branch.
5225/// Check to see if it is branching on an or/and chain of icmp instructions, and
5226/// fold it into a switch instruction if so.
5227bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5228 IRBuilder<> &Builder,
5229 const DataLayout &DL) {
5231 if (!Cond)
5232 return false;
5233
5234 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5235 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5236 // 'setne's and'ed together, collect them.
5237
5238 // Try to gather values from a chain of and/or to be turned into a switch
5239 ConstantComparesGatherer ConstantCompare(Cond, DL);
5240 // Unpack the result
5241 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5242 Value *CompVal = ConstantCompare.CompValue;
5243 unsigned UsedICmps = ConstantCompare.UsedICmps;
5244 Value *ExtraCase = ConstantCompare.Extra;
5245 bool TrueWhenEqual = ConstantCompare.IsEq;
5246
5247 // If we didn't have a multiply compared value, fail.
5248 if (!CompVal)
5249 return false;
5250
5251 // Avoid turning single icmps into a switch.
5252 if (UsedICmps <= 1)
5253 return false;
5254
5255 // There might be duplicate constants in the list, which the switch
5256 // instruction can't handle, remove them now.
5257 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5258 Values.erase(llvm::unique(Values), Values.end());
5259
5260 // If Extra was used, we require at least two switch values to do the
5261 // transformation. A switch with one value is just a conditional branch.
5262 if (ExtraCase && Values.size() < 2)
5263 return false;
5264
5265 SmallVector<uint32_t> BranchWeights;
5266 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5267 extractBranchWeights(*BI, BranchWeights);
5268
5269 // Figure out which block is which destination.
5270 BasicBlock *DefaultBB = BI->getSuccessor(1);
5271 BasicBlock *EdgeBB = BI->getSuccessor(0);
5272 if (!TrueWhenEqual) {
5273 std::swap(DefaultBB, EdgeBB);
5274 if (HasProfile)
5275 std::swap(BranchWeights[0], BranchWeights[1]);
5276 }
5277
5278 BasicBlock *BB = BI->getParent();
5279
5280 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5281 << " cases into SWITCH. BB is:\n"
5282 << *BB);
5283
5284 SmallVector<DominatorTree::UpdateType, 2> Updates;
5285
5286 // If there are any extra values that couldn't be folded into the switch
5287 // then we evaluate them with an explicit branch first. Split the block
5288 // right before the condbr to handle it.
5289 if (ExtraCase) {
5290 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5291 /*MSSAU=*/nullptr, "switch.early.test");
5292
5293 // Remove the uncond branch added to the old block.
5294 Instruction *OldTI = BB->getTerminator();
5295 Builder.SetInsertPoint(OldTI);
5296
5297 // There can be an unintended UB if extra values are Poison. Before the
5298 // transformation, extra values may not be evaluated according to the
5299 // condition, and it will not raise UB. But after transformation, we are
5300 // evaluating extra values before checking the condition, and it will raise
5301 // UB. It can be solved by adding freeze instruction to extra values.
5302 AssumptionCache *AC = Options.AC;
5303
5304 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5305 ExtraCase = Builder.CreateFreeze(ExtraCase);
5306
5307 // We don't have any info about this condition.
5308 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5309 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5311
5312 OldTI->eraseFromParent();
5313
5314 if (DTU)
5315 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5316
5317 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5318 // for the edge we just added.
5319 addPredecessorToBlock(EdgeBB, BB, NewBB);
5320
5321 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5322 << "\nEXTRABB = " << *BB);
5323 BB = NewBB;
5324 }
5325
5326 Builder.SetInsertPoint(BI);
5327 // Convert pointer to int before we switch.
5328 if (CompVal->getType()->isPointerTy()) {
5329 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5330 "Should not end up here with unstable pointers");
5331 CompVal = Builder.CreatePtrToInt(
5332 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5333 }
5334
5335 // Check if we can represent the values as a contiguous range. If so, we use a
5336 // range check + conditional branch instead of a switch.
5337 if (Values.front()->getValue() - Values.back()->getValue() ==
5338 Values.size() - 1) {
5339 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5340 Values.back()->getValue(), Values.front()->getValue() + 1);
5341 APInt Offset, RHS;
5342 ICmpInst::Predicate Pred;
5343 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5344 Value *X = CompVal;
5345 if (!Offset.isZero())
5346 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5347 Value *Cond =
5348 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5349 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5350 if (HasProfile)
5351 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5352 // We don't need to update PHI nodes since we don't add any new edges.
5353 } else {
5354 // Create the new switch instruction now.
5355 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5356 if (HasProfile) {
5357 // We know the weight of the default case. We don't know the weight of the
5358 // other cases, but rather than completely lose profiling info, we split
5359 // the remaining probability equally over them.
5360 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5361 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5362 // if TrueWhenEqual.
5363 for (auto &V : drop_begin(NewWeights))
5364 V = BranchWeights[0] / Values.size();
5365 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5366 }
5367
5368 // Add all of the 'cases' to the switch instruction.
5369 for (ConstantInt *Val : Values)
5370 New->addCase(Val, EdgeBB);
5371
5372 // We added edges from PI to the EdgeBB. As such, if there were any
5373 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5374 // the number of edges added.
5375 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5376 PHINode *PN = cast<PHINode>(BBI);
5377 Value *InVal = PN->getIncomingValueForBlock(BB);
5378 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5379 PN->addIncoming(InVal, BB);
5380 }
5381 }
5382
5383 // Erase the old branch instruction.
5385 if (DTU)
5386 DTU->applyUpdates(Updates);
5387
5388 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5389 return true;
5390}
5391
5392bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5393 if (isa<PHINode>(RI->getValue()))
5394 return simplifyCommonResume(RI);
5395 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5396 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5397 // The resume must unwind the exception that caused control to branch here.
5398 return simplifySingleResume(RI);
5399
5400 return false;
5401}
5402
5403// Check if cleanup block is empty
5405 for (Instruction &I : R) {
5406 auto *II = dyn_cast<IntrinsicInst>(&I);
5407 if (!II)
5408 return false;
5409
5410 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5411 switch (IntrinsicID) {
5412 case Intrinsic::dbg_declare:
5413 case Intrinsic::dbg_value:
5414 case Intrinsic::dbg_label:
5415 case Intrinsic::lifetime_end:
5416 break;
5417 default:
5418 return false;
5419 }
5420 }
5421 return true;
5422}
5423
5424// Simplify resume that is shared by several landing pads (phi of landing pad).
5425bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5426 BasicBlock *BB = RI->getParent();
5427
5428 // Check that there are no other instructions except for debug and lifetime
5429 // intrinsics between the phi's and resume instruction.
5430 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5431 BB->getTerminator()->getIterator())))
5432 return false;
5433
5434 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5435 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5436
5437 // Check incoming blocks to see if any of them are trivial.
5438 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5439 Idx++) {
5440 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5441 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5442
5443 // If the block has other successors, we can not delete it because
5444 // it has other dependents.
5445 if (IncomingBB->getUniqueSuccessor() != BB)
5446 continue;
5447
5448 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5449 // Not the landing pad that caused the control to branch here.
5450 if (IncomingValue != LandingPad)
5451 continue;
5452
5454 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5455 TrivialUnwindBlocks.insert(IncomingBB);
5456 }
5457
5458 // If no trivial unwind blocks, don't do any simplifications.
5459 if (TrivialUnwindBlocks.empty())
5460 return false;
5461
5462 // Turn all invokes that unwind here into calls.
5463 for (auto *TrivialBB : TrivialUnwindBlocks) {
5464 // Blocks that will be simplified should be removed from the phi node.
5465 // Note there could be multiple edges to the resume block, and we need
5466 // to remove them all.
5467 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5468 BB->removePredecessor(TrivialBB, true);
5469
5470 for (BasicBlock *Pred :
5472 removeUnwindEdge(Pred, DTU);
5473 ++NumInvokes;
5474 }
5475
5476 // In each SimplifyCFG run, only the current processed block can be erased.
5477 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5478 // of erasing TrivialBB, we only remove the branch to the common resume
5479 // block so that we can later erase the resume block since it has no
5480 // predecessors.
5481 TrivialBB->getTerminator()->eraseFromParent();
5482 new UnreachableInst(RI->getContext(), TrivialBB);
5483 if (DTU)
5484 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5485 }
5486
5487 // Delete the resume block if all its predecessors have been removed.
5488 if (pred_empty(BB))
5489 DeleteDeadBlock(BB, DTU);
5490
5491 return !TrivialUnwindBlocks.empty();
5492}
5493
5494// Simplify resume that is only used by a single (non-phi) landing pad.
5495bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5496 BasicBlock *BB = RI->getParent();
5497 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5498 assert(RI->getValue() == LPInst &&
5499 "Resume must unwind the exception that caused control to here");
5500
5501 // Check that there are no other instructions except for debug intrinsics.
5503 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5504 return false;
5505
5506 // Turn all invokes that unwind here into calls and delete the basic block.
5507 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5508 removeUnwindEdge(Pred, DTU);
5509 ++NumInvokes;
5510 }
5511
5512 // The landingpad is now unreachable. Zap it.
5513 DeleteDeadBlock(BB, DTU);
5514 return true;
5515}
5516
5518 // If this is a trivial cleanup pad that executes no instructions, it can be
5519 // eliminated. If the cleanup pad continues to the caller, any predecessor
5520 // that is an EH pad will be updated to continue to the caller and any
5521 // predecessor that terminates with an invoke instruction will have its invoke
5522 // instruction converted to a call instruction. If the cleanup pad being
5523 // simplified does not continue to the caller, each predecessor will be
5524 // updated to continue to the unwind destination of the cleanup pad being
5525 // simplified.
5526 BasicBlock *BB = RI->getParent();
5527 CleanupPadInst *CPInst = RI->getCleanupPad();
5528 if (CPInst->getParent() != BB)
5529 // This isn't an empty cleanup.
5530 return false;
5531
5532 // We cannot kill the pad if it has multiple uses. This typically arises
5533 // from unreachable basic blocks.
5534 if (!CPInst->hasOneUse())
5535 return false;
5536
5537 // Check that there are no other instructions except for benign intrinsics.
5539 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5540 return false;
5541
5542 // If the cleanup return we are simplifying unwinds to the caller, this will
5543 // set UnwindDest to nullptr.
5544 BasicBlock *UnwindDest = RI->getUnwindDest();
5545
5546 // We're about to remove BB from the control flow. Before we do, sink any
5547 // PHINodes into the unwind destination. Doing this before changing the
5548 // control flow avoids some potentially slow checks, since we can currently
5549 // be certain that UnwindDest and BB have no common predecessors (since they
5550 // are both EH pads).
5551 if (UnwindDest) {
5552 // First, go through the PHI nodes in UnwindDest and update any nodes that
5553 // reference the block we are removing
5554 for (PHINode &DestPN : UnwindDest->phis()) {
5555 int Idx = DestPN.getBasicBlockIndex(BB);
5556 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5557 assert(Idx != -1);
5558 // This PHI node has an incoming value that corresponds to a control
5559 // path through the cleanup pad we are removing. If the incoming
5560 // value is in the cleanup pad, it must be a PHINode (because we
5561 // verified above that the block is otherwise empty). Otherwise, the
5562 // value is either a constant or a value that dominates the cleanup
5563 // pad being removed.
5564 //
5565 // Because BB and UnwindDest are both EH pads, all of their
5566 // predecessors must unwind to these blocks, and since no instruction
5567 // can have multiple unwind destinations, there will be no overlap in
5568 // incoming blocks between SrcPN and DestPN.
5569 Value *SrcVal = DestPN.getIncomingValue(Idx);
5570 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5571
5572 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5573 for (auto *Pred : predecessors(BB)) {
5574 Value *Incoming =
5575 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5576 DestPN.addIncoming(Incoming, Pred);
5577 }
5578 }
5579
5580 // Sink any remaining PHI nodes directly into UnwindDest.
5581 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5582 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5583 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5584 // If the PHI node has no uses or all of its uses are in this basic
5585 // block (meaning they are debug or lifetime intrinsics), just leave
5586 // it. It will be erased when we erase BB below.
5587 continue;
5588
5589 // Otherwise, sink this PHI node into UnwindDest.
5590 // Any predecessors to UnwindDest which are not already represented
5591 // must be back edges which inherit the value from the path through
5592 // BB. In this case, the PHI value must reference itself.
5593 for (auto *pred : predecessors(UnwindDest))
5594 if (pred != BB)
5595 PN.addIncoming(&PN, pred);
5596 PN.moveBefore(InsertPt);
5597 // Also, add a dummy incoming value for the original BB itself,
5598 // so that the PHI is well-formed until we drop said predecessor.
5599 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5600 }
5601 }
5602
5603 std::vector<DominatorTree::UpdateType> Updates;
5604
5605 // We use make_early_inc_range here because we will remove all predecessors.
5607 if (UnwindDest == nullptr) {
5608 if (DTU) {
5609 DTU->applyUpdates(Updates);
5610 Updates.clear();
5611 }
5612 removeUnwindEdge(PredBB, DTU);
5613 ++NumInvokes;
5614 } else {
5615 BB->removePredecessor(PredBB);
5616 Instruction *TI = PredBB->getTerminator();
5617 TI->replaceUsesOfWith(BB, UnwindDest);
5618 if (DTU) {
5619 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5620 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5621 }
5622 }
5623 }
5624
5625 if (DTU)
5626 DTU->applyUpdates(Updates);
5627
5628 DeleteDeadBlock(BB, DTU);
5629
5630 return true;
5631}
5632
5633// Try to merge two cleanuppads together.
5635 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5636 // with.
5637 BasicBlock *UnwindDest = RI->getUnwindDest();
5638 if (!UnwindDest)
5639 return false;
5640
5641 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5642 // be safe to merge without code duplication.
5643 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5644 return false;
5645
5646 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5647 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5648 if (!SuccessorCleanupPad)
5649 return false;
5650
5651 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5652 // Replace any uses of the successor cleanupad with the predecessor pad
5653 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5654 // funclet bundle operands.
5655 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5656 // Remove the old cleanuppad.
5657 SuccessorCleanupPad->eraseFromParent();
5658 // Now, we simply replace the cleanupret with a branch to the unwind
5659 // destination.
5660 BranchInst::Create(UnwindDest, RI->getParent());
5661 RI->eraseFromParent();
5662
5663 return true;
5664}
5665
5666bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5667 // It is possible to transiantly have an undef cleanuppad operand because we
5668 // have deleted some, but not all, dead blocks.
5669 // Eventually, this block will be deleted.
5670 if (isa<UndefValue>(RI->getOperand(0)))
5671 return false;
5672
5673 if (mergeCleanupPad(RI))
5674 return true;
5675
5676 if (removeEmptyCleanup(RI, DTU))
5677 return true;
5678
5679 return false;
5680}
5681
5682// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5683bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5684 BasicBlock *BB = UI->getParent();
5685
5686 bool Changed = false;
5687
5688 // Ensure that any debug-info records that used to occur after the Unreachable
5689 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5690 // the block.
5692
5693 // Debug-info records on the unreachable inst itself should be deleted, as
5694 // below we delete everything past the final executable instruction.
5695 UI->dropDbgRecords();
5696
5697 // If there are any instructions immediately before the unreachable that can
5698 // be removed, do so.
5699 while (UI->getIterator() != BB->begin()) {
5701 --BBI;
5702
5704 break; // Can not drop any more instructions. We're done here.
5705 // Otherwise, this instruction can be freely erased,
5706 // even if it is not side-effect free.
5707
5708 // Note that deleting EH's here is in fact okay, although it involves a bit
5709 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5710 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5711 // and we can therefore guarantee this block will be erased.
5712
5713 // If we're deleting this, we're deleting any subsequent debug info, so
5714 // delete DbgRecords.
5715 BBI->dropDbgRecords();
5716
5717 // Delete this instruction (any uses are guaranteed to be dead)
5718 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5719 BBI->eraseFromParent();
5720 Changed = true;
5721 }
5722
5723 // If the unreachable instruction is the first in the block, take a gander
5724 // at all of the predecessors of this instruction, and simplify them.
5725 if (&BB->front() != UI)
5726 return Changed;
5727
5728 std::vector<DominatorTree::UpdateType> Updates;
5729
5730 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5731 for (BasicBlock *Predecessor : Preds) {
5732 Instruction *TI = Predecessor->getTerminator();
5733 IRBuilder<> Builder(TI);
5734 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5735 // We could either have a proper unconditional branch,
5736 // or a degenerate conditional branch with matching destinations.
5737 if (all_of(BI->successors(),
5738 [BB](auto *Successor) { return Successor == BB; })) {
5739 new UnreachableInst(TI->getContext(), TI->getIterator());
5740 TI->eraseFromParent();
5741 Changed = true;
5742 } else {
5743 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5744 Value* Cond = BI->getCondition();
5745 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5746 "The destinations are guaranteed to be different here.");
5747 CallInst *Assumption;
5748 if (BI->getSuccessor(0) == BB) {
5749 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5750 Builder.CreateBr(BI->getSuccessor(1));
5751 } else {
5752 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5753 Assumption = Builder.CreateAssumption(Cond);
5754 Builder.CreateBr(BI->getSuccessor(0));
5755 }
5756 if (Options.AC)
5757 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5758
5760 Changed = true;
5761 }
5762 if (DTU)
5763 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5764 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5765 SwitchInstProfUpdateWrapper SU(*SI);
5766 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5767 if (i->getCaseSuccessor() != BB) {
5768 ++i;
5769 continue;
5770 }
5771 BB->removePredecessor(SU->getParent());
5772 i = SU.removeCase(i);
5773 e = SU->case_end();
5774 Changed = true;
5775 }
5776 // Note that the default destination can't be removed!
5777 if (DTU && SI->getDefaultDest() != BB)
5778 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5779 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5780 if (II->getUnwindDest() == BB) {
5781 if (DTU) {
5782 DTU->applyUpdates(Updates);
5783 Updates.clear();
5784 }
5785 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5786 if (!CI->doesNotThrow())
5787 CI->setDoesNotThrow();
5788 Changed = true;
5789 }
5790 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5791 if (CSI->getUnwindDest() == BB) {
5792 if (DTU) {
5793 DTU->applyUpdates(Updates);
5794 Updates.clear();
5795 }
5796 removeUnwindEdge(TI->getParent(), DTU);
5797 Changed = true;
5798 continue;
5799 }
5800
5801 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5802 E = CSI->handler_end();
5803 I != E; ++I) {
5804 if (*I == BB) {
5805 CSI->removeHandler(I);
5806 --I;
5807 --E;
5808 Changed = true;
5809 }
5810 }
5811 if (DTU)
5812 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5813 if (CSI->getNumHandlers() == 0) {
5814 if (CSI->hasUnwindDest()) {
5815 // Redirect all predecessors of the block containing CatchSwitchInst
5816 // to instead branch to the CatchSwitchInst's unwind destination.
5817 if (DTU) {
5818 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5819 Updates.push_back({DominatorTree::Insert,
5820 PredecessorOfPredecessor,
5821 CSI->getUnwindDest()});
5822 Updates.push_back({DominatorTree::Delete,
5823 PredecessorOfPredecessor, Predecessor});
5824 }
5825 }
5826 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5827 } else {
5828 // Rewrite all preds to unwind to caller (or from invoke to call).
5829 if (DTU) {
5830 DTU->applyUpdates(Updates);
5831 Updates.clear();
5832 }
5833 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5834 for (BasicBlock *EHPred : EHPreds)
5835 removeUnwindEdge(EHPred, DTU);
5836 }
5837 // The catchswitch is no longer reachable.
5838 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5839 CSI->eraseFromParent();
5840 Changed = true;
5841 }
5842 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5843 (void)CRI;
5844 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5845 "Expected to always have an unwind to BB.");
5846 if (DTU)
5847 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5848 new UnreachableInst(TI->getContext(), TI->getIterator());
5849 TI->eraseFromParent();
5850 Changed = true;
5851 }
5852 }
5853
5854 if (DTU)
5855 DTU->applyUpdates(Updates);
5856
5857 // If this block is now dead, remove it.
5858 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5859 DeleteDeadBlock(BB, DTU);
5860 return true;
5861 }
5862
5863 return Changed;
5864}
5865
5874
5875static std::optional<ContiguousCasesResult>
5878 BasicBlock *Dest, BasicBlock *OtherDest) {
5879 assert(Cases.size() >= 1);
5880
5882 const APInt &Min = Cases.back()->getValue();
5883 const APInt &Max = Cases.front()->getValue();
5884 APInt Offset = Max - Min;
5885 size_t ContiguousOffset = Cases.size() - 1;
5886 if (Offset == ContiguousOffset) {
5887 return ContiguousCasesResult{
5888 /*Min=*/Cases.back(),
5889 /*Max=*/Cases.front(),
5890 /*Dest=*/Dest,
5891 /*OtherDest=*/OtherDest,
5892 /*Cases=*/&Cases,
5893 /*OtherCases=*/&OtherCases,
5894 };
5895 }
5896 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5897 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5898 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5899 // contiguous range for the other destination. N.B. If CR is not a full range,
5900 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5901 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5902 assert(Cases.size() >= 2);
5903 auto *It =
5904 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5905 return L->getValue() != R->getValue() + 1;
5906 });
5907 if (It == Cases.end())
5908 return std::nullopt;
5909 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5910 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5911 Cases.size() - 2) {
5912 return ContiguousCasesResult{
5913 /*Min=*/cast<ConstantInt>(
5914 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5915 /*Max=*/
5917 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5918 /*Dest=*/OtherDest,
5919 /*OtherDest=*/Dest,
5920 /*Cases=*/&OtherCases,
5921 /*OtherCases=*/&Cases,
5922 };
5923 }
5924 }
5925 return std::nullopt;
5926}
5927
5929 DomTreeUpdater *DTU,
5930 bool RemoveOrigDefaultBlock = true) {
5931 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5932 auto *BB = Switch->getParent();
5933 auto *OrigDefaultBlock = Switch->getDefaultDest();
5934 if (RemoveOrigDefaultBlock)
5935 OrigDefaultBlock->removePredecessor(BB);
5936 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5937 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5938 OrigDefaultBlock);
5939 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5941 Switch->setDefaultDest(&*NewDefaultBlock);
5942 if (DTU) {
5944 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5945 if (RemoveOrigDefaultBlock &&
5946 !is_contained(successors(BB), OrigDefaultBlock))
5947 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5948 DTU->applyUpdates(Updates);
5949 }
5950}
5951
5952/// Turn a switch into an integer range comparison and branch.
5953/// Switches with more than 2 destinations are ignored.
5954/// Switches with 1 destination are also ignored.
5955bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5956 IRBuilder<> &Builder) {
5957 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5958
5959 bool HasDefault = !SI->defaultDestUnreachable();
5960
5961 auto *BB = SI->getParent();
5962 // Partition the cases into two sets with different destinations.
5963 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5964 BasicBlock *DestB = nullptr;
5967
5968 for (auto Case : SI->cases()) {
5969 BasicBlock *Dest = Case.getCaseSuccessor();
5970 if (!DestA)
5971 DestA = Dest;
5972 if (Dest == DestA) {
5973 CasesA.push_back(Case.getCaseValue());
5974 continue;
5975 }
5976 if (!DestB)
5977 DestB = Dest;
5978 if (Dest == DestB) {
5979 CasesB.push_back(Case.getCaseValue());
5980 continue;
5981 }
5982 return false; // More than two destinations.
5983 }
5984 if (!DestB)
5985 return false; // All destinations are the same and the default is unreachable
5986
5987 assert(DestA && DestB &&
5988 "Single-destination switch should have been folded.");
5989 assert(DestA != DestB);
5990 assert(DestB != SI->getDefaultDest());
5991 assert(!CasesB.empty() && "There must be non-default cases.");
5992 assert(!CasesA.empty() || HasDefault);
5993
5994 // Figure out if one of the sets of cases form a contiguous range.
5995 std::optional<ContiguousCasesResult> ContiguousCases;
5996
5997 // Only one icmp is needed when there is only one case.
5998 if (!HasDefault && CasesA.size() == 1)
5999 ContiguousCases = ContiguousCasesResult{
6000 /*Min=*/CasesA[0],
6001 /*Max=*/CasesA[0],
6002 /*Dest=*/DestA,
6003 /*OtherDest=*/DestB,
6004 /*Cases=*/&CasesA,
6005 /*OtherCases=*/&CasesB,
6006 };
6007 else if (CasesB.size() == 1)
6008 ContiguousCases = ContiguousCasesResult{
6009 /*Min=*/CasesB[0],
6010 /*Max=*/CasesB[0],
6011 /*Dest=*/DestB,
6012 /*OtherDest=*/DestA,
6013 /*Cases=*/&CasesB,
6014 /*OtherCases=*/&CasesA,
6015 };
6016 // Correctness: Cases to the default destination cannot be contiguous cases.
6017 else if (!HasDefault)
6018 ContiguousCases =
6019 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6020
6021 if (!ContiguousCases)
6022 ContiguousCases =
6023 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6024
6025 if (!ContiguousCases)
6026 return false;
6027
6028 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6029
6030 // Start building the compare and branch.
6031
6033 Constant *NumCases = ConstantInt::get(Offset->getType(),
6034 Max->getValue() - Min->getValue() + 1);
6035 BranchInst *NewBI;
6036 if (NumCases->isOneValue()) {
6037 assert(Max->getValue() == Min->getValue());
6038 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6039 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6040 }
6041 // If NumCases overflowed, then all possible values jump to the successor.
6042 else if (NumCases->isNullValue() && !Cases->empty()) {
6043 NewBI = Builder.CreateBr(Dest);
6044 } else {
6045 Value *Sub = SI->getCondition();
6046 if (!Offset->isNullValue())
6047 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6048 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6049 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6050 }
6051
6052 // Update weight for the newly-created conditional branch.
6053 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
6054 SmallVector<uint64_t, 8> Weights;
6055 getBranchWeights(SI, Weights);
6056 if (Weights.size() == 1 + SI->getNumCases()) {
6057 uint64_t TrueWeight = 0;
6058 uint64_t FalseWeight = 0;
6059 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6060 if (SI->getSuccessor(I) == Dest)
6061 TrueWeight += Weights[I];
6062 else
6063 FalseWeight += Weights[I];
6064 }
6065 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6066 TrueWeight /= 2;
6067 FalseWeight /= 2;
6068 }
6069 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6070 /*IsExpected=*/false, /*ElideAllZero=*/true);
6071 }
6072 }
6073
6074 // Prune obsolete incoming values off the successors' PHI nodes.
6075 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6076 unsigned PreviousEdges = Cases->size();
6077 if (Dest == SI->getDefaultDest())
6078 ++PreviousEdges;
6079 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6080 PHI.removeIncomingValue(SI->getParent());
6081 }
6082 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6083 unsigned PreviousEdges = OtherCases->size();
6084 if (OtherDest == SI->getDefaultDest())
6085 ++PreviousEdges;
6086 unsigned E = PreviousEdges - 1;
6087 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6088 if (NewBI->isUnconditional())
6089 ++E;
6090 for (unsigned I = 0; I != E; ++I)
6091 PHI.removeIncomingValue(SI->getParent());
6092 }
6093
6094 // Clean up the default block - it may have phis or other instructions before
6095 // the unreachable terminator.
6096 if (!HasDefault)
6098
6099 auto *UnreachableDefault = SI->getDefaultDest();
6100
6101 // Drop the switch.
6102 SI->eraseFromParent();
6103
6104 if (!HasDefault && DTU)
6105 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6106
6107 return true;
6108}
6109
6110/// Compute masked bits for the condition of a switch
6111/// and use it to remove dead cases.
6113 AssumptionCache *AC,
6114 const DataLayout &DL) {
6115 Value *Cond = SI->getCondition();
6116 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6118 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6119
6120 // We can also eliminate cases by determining that their values are outside of
6121 // the limited range of the condition based on how many significant (non-sign)
6122 // bits are in the condition value.
6123 unsigned MaxSignificantBitsInCond =
6125
6126 // Gather dead cases.
6128 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6129 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6130 for (const auto &Case : SI->cases()) {
6131 auto *Successor = Case.getCaseSuccessor();
6132 if (DTU) {
6133 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6134 if (Inserted)
6135 UniqueSuccessors.push_back(Successor);
6136 ++It->second;
6137 }
6138 ConstantInt *CaseC = Case.getCaseValue();
6139 const APInt &CaseVal = CaseC->getValue();
6140 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6141 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6142 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6143 DeadCases.push_back(CaseC);
6144 if (DTU)
6145 --NumPerSuccessorCases[Successor];
6146 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6147 << " is dead.\n");
6148 } else if (IsKnownValuesValid)
6149 KnownValues.erase(CaseC);
6150 }
6151
6152 // If we can prove that the cases must cover all possible values, the
6153 // default destination becomes dead and we can remove it. If we know some
6154 // of the bits in the value, we can use that to more precisely compute the
6155 // number of possible unique case values.
6156 bool HasDefault = !SI->defaultDestUnreachable();
6157 const unsigned NumUnknownBits =
6158 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6159 assert(NumUnknownBits <= Known.getBitWidth());
6160 if (HasDefault && DeadCases.empty()) {
6161 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6163 return true;
6164 }
6165
6166 if (NumUnknownBits < 64 /* avoid overflow */) {
6167 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6168 if (SI->getNumCases() == AllNumCases) {
6170 return true;
6171 }
6172 // When only one case value is missing, replace default with that case.
6173 // Eliminating the default branch will provide more opportunities for
6174 // optimization, such as lookup tables.
6175 if (SI->getNumCases() == AllNumCases - 1) {
6176 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6177 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6178 if (CondTy->getIntegerBitWidth() > 64 ||
6179 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6180 return false;
6181
6182 uint64_t MissingCaseVal = 0;
6183 for (const auto &Case : SI->cases())
6184 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6185 auto *MissingCase = cast<ConstantInt>(
6186 ConstantInt::get(Cond->getType(), MissingCaseVal));
6188 SIW.addCase(MissingCase, SI->getDefaultDest(),
6189 SIW.getSuccessorWeight(0));
6191 /*RemoveOrigDefaultBlock*/ false);
6192 SIW.setSuccessorWeight(0, 0);
6193 return true;
6194 }
6195 }
6196 }
6197
6198 if (DeadCases.empty())
6199 return false;
6200
6202 for (ConstantInt *DeadCase : DeadCases) {
6203 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6204 assert(CaseI != SI->case_default() &&
6205 "Case was not found. Probably mistake in DeadCases forming.");
6206 // Prune unused values from PHI nodes.
6207 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6208 SIW.removeCase(CaseI);
6209 }
6210
6211 if (DTU) {
6212 std::vector<DominatorTree::UpdateType> Updates;
6213 for (auto *Successor : UniqueSuccessors)
6214 if (NumPerSuccessorCases[Successor] == 0)
6215 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6216 DTU->applyUpdates(Updates);
6217 }
6218
6219 return true;
6220}
6221
6222/// If BB would be eligible for simplification by
6223/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6224/// by an unconditional branch), look at the phi node for BB in the successor
6225/// block and see if the incoming value is equal to CaseValue. If so, return
6226/// the phi node, and set PhiIndex to BB's index in the phi node.
6228 BasicBlock *BB, int *PhiIndex) {
6229 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6230 return nullptr; // BB must be empty to be a candidate for simplification.
6231 if (!BB->getSinglePredecessor())
6232 return nullptr; // BB must be dominated by the switch.
6233
6235 if (!Branch || !Branch->isUnconditional())
6236 return nullptr; // Terminator must be unconditional branch.
6237
6238 BasicBlock *Succ = Branch->getSuccessor(0);
6239
6240 for (PHINode &PHI : Succ->phis()) {
6241 int Idx = PHI.getBasicBlockIndex(BB);
6242 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6243
6244 Value *InValue = PHI.getIncomingValue(Idx);
6245 if (InValue != CaseValue)
6246 continue;
6247
6248 *PhiIndex = Idx;
6249 return &PHI;
6250 }
6251
6252 return nullptr;
6253}
6254
6255/// Try to forward the condition of a switch instruction to a phi node
6256/// dominated by the switch, if that would mean that some of the destination
6257/// blocks of the switch can be folded away. Return true if a change is made.
6259 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6260
6261 ForwardingNodesMap ForwardingNodes;
6262 BasicBlock *SwitchBlock = SI->getParent();
6263 bool Changed = false;
6264 for (const auto &Case : SI->cases()) {
6265 ConstantInt *CaseValue = Case.getCaseValue();
6266 BasicBlock *CaseDest = Case.getCaseSuccessor();
6267
6268 // Replace phi operands in successor blocks that are using the constant case
6269 // value rather than the switch condition variable:
6270 // switchbb:
6271 // switch i32 %x, label %default [
6272 // i32 17, label %succ
6273 // ...
6274 // succ:
6275 // %r = phi i32 ... [ 17, %switchbb ] ...
6276 // -->
6277 // %r = phi i32 ... [ %x, %switchbb ] ...
6278
6279 for (PHINode &Phi : CaseDest->phis()) {
6280 // This only works if there is exactly 1 incoming edge from the switch to
6281 // a phi. If there is >1, that means multiple cases of the switch map to 1
6282 // value in the phi, and that phi value is not the switch condition. Thus,
6283 // this transform would not make sense (the phi would be invalid because
6284 // a phi can't have different incoming values from the same block).
6285 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6286 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6287 count(Phi.blocks(), SwitchBlock) == 1) {
6288 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6289 Changed = true;
6290 }
6291 }
6292
6293 // Collect phi nodes that are indirectly using this switch's case constants.
6294 int PhiIdx;
6295 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6296 ForwardingNodes[Phi].push_back(PhiIdx);
6297 }
6298
6299 for (auto &ForwardingNode : ForwardingNodes) {
6300 PHINode *Phi = ForwardingNode.first;
6301 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6302 // Check if it helps to fold PHI.
6303 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6304 continue;
6305
6306 for (int Index : Indexes)
6307 Phi->setIncomingValue(Index, SI->getCondition());
6308 Changed = true;
6309 }
6310
6311 return Changed;
6312}
6313
6314/// Return true if the backend will be able to handle
6315/// initializing an array of constants like C.
6317 if (C->isThreadDependent())
6318 return false;
6319 if (C->isDLLImportDependent())
6320 return false;
6321
6322 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6325 return false;
6326
6328 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6329 // materializing the array of constants.
6330 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6331 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6332 return false;
6333 }
6334
6335 if (!TTI.shouldBuildLookupTablesForConstant(C))
6336 return false;
6337
6338 return true;
6339}
6340
6341/// If V is a Constant, return it. Otherwise, try to look up
6342/// its constant value in ConstantPool, returning 0 if it's not there.
6343static Constant *
6346 if (Constant *C = dyn_cast<Constant>(V))
6347 return C;
6348 return ConstantPool.lookup(V);
6349}
6350
6351/// Try to fold instruction I into a constant. This works for
6352/// simple instructions such as binary operations where both operands are
6353/// constant or can be replaced by constants from the ConstantPool. Returns the
6354/// resulting constant on success, 0 otherwise.
6355static Constant *
6359 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6360 if (!A)
6361 return nullptr;
6362 if (A->isAllOnesValue())
6363 return lookupConstant(Select->getTrueValue(), ConstantPool);
6364 if (A->isNullValue())
6365 return lookupConstant(Select->getFalseValue(), ConstantPool);
6366 return nullptr;
6367 }
6368
6370 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6371 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6372 COps.push_back(A);
6373 else
6374 return nullptr;
6375 }
6376
6377 return ConstantFoldInstOperands(I, COps, DL);
6378}
6379
6380/// Try to determine the resulting constant values in phi nodes
6381/// at the common destination basic block, *CommonDest, for one of the case
6382/// destionations CaseDest corresponding to value CaseVal (0 for the default
6383/// case), of a switch instruction SI.
6384static bool
6386 BasicBlock **CommonDest,
6387 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6388 const DataLayout &DL, const TargetTransformInfo &TTI) {
6389 // The block from which we enter the common destination.
6390 BasicBlock *Pred = SI->getParent();
6391
6392 // If CaseDest is empty except for some side-effect free instructions through
6393 // which we can constant-propagate the CaseVal, continue to its successor.
6395 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6396 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6397 if (I.isTerminator()) {
6398 // If the terminator is a simple branch, continue to the next block.
6399 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6400 return false;
6401 Pred = CaseDest;
6402 CaseDest = I.getSuccessor(0);
6403 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6404 // Instruction is side-effect free and constant.
6405
6406 // If the instruction has uses outside this block or a phi node slot for
6407 // the block, it is not safe to bypass the instruction since it would then
6408 // no longer dominate all its uses.
6409 for (auto &Use : I.uses()) {
6410 User *User = Use.getUser();
6412 if (I->getParent() == CaseDest)
6413 continue;
6414 if (PHINode *Phi = dyn_cast<PHINode>(User))
6415 if (Phi->getIncomingBlock(Use) == CaseDest)
6416 continue;
6417 return false;
6418 }
6419
6420 ConstantPool.insert(std::make_pair(&I, C));
6421 } else {
6422 break;
6423 }
6424 }
6425
6426 // If we did not have a CommonDest before, use the current one.
6427 if (!*CommonDest)
6428 *CommonDest = CaseDest;
6429 // If the destination isn't the common one, abort.
6430 if (CaseDest != *CommonDest)
6431 return false;
6432
6433 // Get the values for this case from phi nodes in the destination block.
6434 for (PHINode &PHI : (*CommonDest)->phis()) {
6435 int Idx = PHI.getBasicBlockIndex(Pred);
6436 if (Idx == -1)
6437 continue;
6438
6439 Constant *ConstVal =
6440 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6441 if (!ConstVal)
6442 return false;
6443
6444 // Be conservative about which kinds of constants we support.
6445 if (!validLookupTableConstant(ConstVal, TTI))
6446 return false;
6447
6448 Res.push_back(std::make_pair(&PHI, ConstVal));
6449 }
6450
6451 return Res.size() > 0;
6452}
6453
6454// Helper function used to add CaseVal to the list of cases that generate
6455// Result. Returns the updated number of cases that generate this result.
6456static size_t mapCaseToResult(ConstantInt *CaseVal,
6457 SwitchCaseResultVectorTy &UniqueResults,
6458 Constant *Result) {
6459 for (auto &I : UniqueResults) {
6460 if (I.first == Result) {
6461 I.second.push_back(CaseVal);
6462 return I.second.size();
6463 }
6464 }
6465 UniqueResults.push_back(
6466 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6467 return 1;
6468}
6469
6470// Helper function that initializes a map containing
6471// results for the PHI node of the common destination block for a switch
6472// instruction. Returns false if multiple PHI nodes have been found or if
6473// there is not a common destination block for the switch.
6475 BasicBlock *&CommonDest,
6476 SwitchCaseResultVectorTy &UniqueResults,
6477 Constant *&DefaultResult,
6478 const DataLayout &DL,
6479 const TargetTransformInfo &TTI,
6480 uintptr_t MaxUniqueResults) {
6481 for (const auto &I : SI->cases()) {
6482 ConstantInt *CaseVal = I.getCaseValue();
6483
6484 // Resulting value at phi nodes for this case value.
6485 SwitchCaseResultsTy Results;
6486 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6487 DL, TTI))
6488 return false;
6489
6490 // Only one value per case is permitted.
6491 if (Results.size() > 1)
6492 return false;
6493
6494 // Add the case->result mapping to UniqueResults.
6495 const size_t NumCasesForResult =
6496 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6497
6498 // Early out if there are too many cases for this result.
6499 if (NumCasesForResult > MaxSwitchCasesPerResult)
6500 return false;
6501
6502 // Early out if there are too many unique results.
6503 if (UniqueResults.size() > MaxUniqueResults)
6504 return false;
6505
6506 // Check the PHI consistency.
6507 if (!PHI)
6508 PHI = Results[0].first;
6509 else if (PHI != Results[0].first)
6510 return false;
6511 }
6512 // Find the default result value.
6514 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6515 DL, TTI);
6516 // If the default value is not found abort unless the default destination
6517 // is unreachable.
6518 DefaultResult =
6519 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6520
6521 return DefaultResult || SI->defaultDestUnreachable();
6522}
6523
6524// Helper function that checks if it is possible to transform a switch with only
6525// two cases (or two cases + default) that produces a result into a select.
6526// TODO: Handle switches with more than 2 cases that map to the same result.
6527// The branch weights correspond to the provided Condition (i.e. if Condition is
6528// modified from the original SwitchInst, the caller must adjust the weights)
6529static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6530 Constant *DefaultResult, Value *Condition,
6531 IRBuilder<> &Builder, const DataLayout &DL,
6532 ArrayRef<uint32_t> BranchWeights) {
6533 // If we are selecting between only two cases transform into a simple
6534 // select or a two-way select if default is possible.
6535 // Example:
6536 // switch (a) { %0 = icmp eq i32 %a, 10
6537 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6538 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6539 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6540 // }
6541
6542 const bool HasBranchWeights =
6543 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6544
6545 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6546 ResultVector[1].second.size() == 1) {
6547 ConstantInt *FirstCase = ResultVector[0].second[0];
6548 ConstantInt *SecondCase = ResultVector[1].second[0];
6549 Value *SelectValue = ResultVector[1].first;
6550 if (DefaultResult) {
6551 Value *ValueCompare =
6552 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6553 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6554 DefaultResult, "switch.select");
6555 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6556 SI && HasBranchWeights) {
6557 // We start with 3 probabilities, where the numerator is the
6558 // corresponding BranchWeights[i], and the denominator is the sum over
6559 // BranchWeights. We want the probability and negative probability of
6560 // Condition == SecondCase.
6561 assert(BranchWeights.size() == 3);
6563 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6564 /*IsExpected=*/false, /*ElideAllZero=*/true);
6565 }
6566 }
6567 Value *ValueCompare =
6568 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6569 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6570 SelectValue, "switch.select");
6571 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6572 // We may have had a DefaultResult. Base the position of the first and
6573 // second's branch weights accordingly. Also the proability that Condition
6574 // != FirstCase needs to take that into account.
6575 assert(BranchWeights.size() >= 2);
6576 size_t FirstCasePos = (Condition != nullptr);
6577 size_t SecondCasePos = FirstCasePos + 1;
6578 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6580 {BranchWeights[FirstCasePos],
6581 DefaultCase + BranchWeights[SecondCasePos]},
6582 /*IsExpected=*/false, /*ElideAllZero=*/true);
6583 }
6584 return Ret;
6585 }
6586
6587 // Handle the degenerate case where two cases have the same result value.
6588 if (ResultVector.size() == 1 && DefaultResult) {
6589 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6590 unsigned CaseCount = CaseValues.size();
6591 // n bits group cases map to the same result:
6592 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6593 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6594 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6595 if (isPowerOf2_32(CaseCount)) {
6596 ConstantInt *MinCaseVal = CaseValues[0];
6597 // If there are bits that are set exclusively by CaseValues, we
6598 // can transform the switch into a select if the conjunction of
6599 // all the values uniquely identify CaseValues.
6600 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6601
6602 // Find the minimum value and compute the and of all the case values.
6603 for (auto *Case : CaseValues) {
6604 if (Case->getValue().slt(MinCaseVal->getValue()))
6605 MinCaseVal = Case;
6606 AndMask &= Case->getValue();
6607 }
6608 KnownBits Known = computeKnownBits(Condition, DL);
6609
6610 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6611 // Compute the number of bits that are free to vary.
6612 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6613
6614 // Check if the number of values covered by the mask is equal
6615 // to the number of cases.
6616 if (FreeBits == Log2_32(CaseCount)) {
6617 Value *And = Builder.CreateAnd(Condition, AndMask);
6618 Value *Cmp = Builder.CreateICmpEQ(
6619 And, Constant::getIntegerValue(And->getType(), AndMask));
6620 Value *Ret =
6621 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6622 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6623 // We know there's a Default case. We base the resulting branch
6624 // weights off its probability.
6625 assert(BranchWeights.size() >= 2);
6627 *SI,
6628 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6629 /*IsExpected=*/false, /*ElideAllZero=*/true);
6630 }
6631 return Ret;
6632 }
6633 }
6634
6635 // Mark the bits case number touched.
6636 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6637 for (auto *Case : CaseValues)
6638 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6639
6640 // Check if cases with the same result can cover all number
6641 // in touched bits.
6642 if (BitMask.popcount() == Log2_32(CaseCount)) {
6643 if (!MinCaseVal->isNullValue())
6644 Condition = Builder.CreateSub(Condition, MinCaseVal);
6645 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6646 Value *Cmp = Builder.CreateICmpEQ(
6647 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6648 Value *Ret =
6649 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6650 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6651 assert(BranchWeights.size() >= 2);
6653 *SI,
6654 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6655 /*IsExpected=*/false, /*ElideAllZero=*/true);
6656 }
6657 return Ret;
6658 }
6659 }
6660
6661 // Handle the degenerate case where two cases have the same value.
6662 if (CaseValues.size() == 2) {
6663 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6664 "switch.selectcmp.case1");
6665 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6666 "switch.selectcmp.case2");
6667 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6668 Value *Ret =
6669 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6670 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6671 assert(BranchWeights.size() >= 2);
6673 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6674 /*IsExpected=*/false, /*ElideAllZero=*/true);
6675 }
6676 return Ret;
6677 }
6678 }
6679
6680 return nullptr;
6681}
6682
6683// Helper function to cleanup a switch instruction that has been converted into
6684// a select, fixing up PHI nodes and basic blocks.
6686 Value *SelectValue,
6687 IRBuilder<> &Builder,
6688 DomTreeUpdater *DTU) {
6689 std::vector<DominatorTree::UpdateType> Updates;
6690
6691 BasicBlock *SelectBB = SI->getParent();
6692 BasicBlock *DestBB = PHI->getParent();
6693
6694 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6695 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6696 Builder.CreateBr(DestBB);
6697
6698 // Remove the switch.
6699
6700 PHI->removeIncomingValueIf(
6701 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6702 PHI->addIncoming(SelectValue, SelectBB);
6703
6704 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6705 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6706 BasicBlock *Succ = SI->getSuccessor(i);
6707
6708 if (Succ == DestBB)
6709 continue;
6710 Succ->removePredecessor(SelectBB);
6711 if (DTU && RemovedSuccessors.insert(Succ).second)
6712 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6713 }
6714 SI->eraseFromParent();
6715 if (DTU)
6716 DTU->applyUpdates(Updates);
6717}
6718
6719/// If a switch is only used to initialize one or more phi nodes in a common
6720/// successor block with only two different constant values, try to replace the
6721/// switch with a select. Returns true if the fold was made.
6723 DomTreeUpdater *DTU, const DataLayout &DL,
6724 const TargetTransformInfo &TTI) {
6725 Value *const Cond = SI->getCondition();
6726 PHINode *PHI = nullptr;
6727 BasicBlock *CommonDest = nullptr;
6728 Constant *DefaultResult;
6729 SwitchCaseResultVectorTy UniqueResults;
6730 // Collect all the cases that will deliver the same value from the switch.
6731 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6732 DL, TTI, /*MaxUniqueResults*/ 2))
6733 return false;
6734
6735 assert(PHI != nullptr && "PHI for value select not found");
6736 Builder.SetInsertPoint(SI);
6737 SmallVector<uint32_t, 4> BranchWeights;
6739 [[maybe_unused]] auto HasWeights =
6741 assert(!HasWeights == (BranchWeights.empty()));
6742 }
6743 assert(BranchWeights.empty() ||
6744 (BranchWeights.size() >=
6745 UniqueResults.size() + (DefaultResult != nullptr)));
6746
6747 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6748 Builder, DL, BranchWeights);
6749 if (!SelectValue)
6750 return false;
6751
6752 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6753 return true;
6754}
6755
6756namespace {
6757
6758/// This class finds alternatives for switches to ultimately
6759/// replace the switch.
6760class SwitchReplacement {
6761public:
6762 /// Create a helper for optimizations to use as a switch replacement.
6763 /// Find a better representation for the content of Values,
6764 /// using DefaultValue to fill any holes in the table.
6765 SwitchReplacement(
6766 Module &M, uint64_t TableSize, ConstantInt *Offset,
6767 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6768 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6769
6770 /// Build instructions with Builder to retrieve values using Index
6771 /// and replace the switch.
6772 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6773 Function *Func);
6774
6775 /// Return true if a table with TableSize elements of
6776 /// type ElementType would fit in a target-legal register.
6777 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6778 Type *ElementType);
6779
6780 /// Return the default value of the switch.
6781 Constant *getDefaultValue();
6782
6783 /// Return true if the replacement is a lookup table.
6784 bool isLookupTable();
6785
6786 /// Return true if the replacement is a bit map.
6787 bool isBitMap();
6788
6789private:
6790 // Depending on the switch, there are different alternatives.
6791 enum {
6792 // For switches where each case contains the same value, we just have to
6793 // store that single value and return it for each lookup.
6794 SingleValueKind,
6795
6796 // For switches where there is a linear relationship between table index
6797 // and values. We calculate the result with a simple multiplication
6798 // and addition instead of a table lookup.
6799 LinearMapKind,
6800
6801 // For small tables with integer elements, we can pack them into a bitmap
6802 // that fits into a target-legal register. Values are retrieved by
6803 // shift and mask operations.
6804 BitMapKind,
6805
6806 // The table is stored as an array of values. Values are retrieved by load
6807 // instructions from the table.
6808 LookupTableKind
6809 } Kind;
6810
6811 // The default value of the switch.
6812 Constant *DefaultValue;
6813
6814 // The type of the output values.
6815 Type *ValueType;
6816
6817 // For SingleValueKind, this is the single value.
6818 Constant *SingleValue = nullptr;
6819
6820 // For BitMapKind, this is the bitmap.
6821 ConstantInt *BitMap = nullptr;
6822 IntegerType *BitMapElementTy = nullptr;
6823
6824 // For LinearMapKind, these are the constants used to derive the value.
6825 ConstantInt *LinearOffset = nullptr;
6826 ConstantInt *LinearMultiplier = nullptr;
6827 bool LinearMapValWrapped = false;
6828
6829 // For LookupTableKind, this is the table.
6830 Constant *Initializer = nullptr;
6831};
6832
6833} // end anonymous namespace
6834
6835SwitchReplacement::SwitchReplacement(
6836 Module &M, uint64_t TableSize, ConstantInt *Offset,
6837 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6838 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6839 : DefaultValue(DefaultValue) {
6840 assert(Values.size() && "Can't build lookup table without values!");
6841 assert(TableSize >= Values.size() && "Can't fit values in table!");
6842
6843 // If all values in the table are equal, this is that value.
6844 SingleValue = Values.begin()->second;
6845
6846 ValueType = Values.begin()->second->getType();
6847
6848 // Build up the table contents.
6849 SmallVector<Constant *, 64> TableContents(TableSize);
6850 for (const auto &[CaseVal, CaseRes] : Values) {
6851 assert(CaseRes->getType() == ValueType);
6852
6853 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6854 TableContents[Idx] = CaseRes;
6855
6856 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6857 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6858 }
6859
6860 // Fill in any holes in the table with the default result.
6861 if (Values.size() < TableSize) {
6862 assert(DefaultValue &&
6863 "Need a default value to fill the lookup table holes.");
6864 assert(DefaultValue->getType() == ValueType);
6865 for (uint64_t I = 0; I < TableSize; ++I) {
6866 if (!TableContents[I])
6867 TableContents[I] = DefaultValue;
6868 }
6869
6870 // If the default value is poison, all the holes are poison.
6871 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6872
6873 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6874 SingleValue = nullptr;
6875 }
6876
6877 // If each element in the table contains the same value, we only need to store
6878 // that single value.
6879 if (SingleValue) {
6880 Kind = SingleValueKind;
6881 return;
6882 }
6883
6884 // Check if we can derive the value with a linear transformation from the
6885 // table index.
6887 bool LinearMappingPossible = true;
6888 APInt PrevVal;
6889 APInt DistToPrev;
6890 // When linear map is monotonic and signed overflow doesn't happen on
6891 // maximum index, we can attach nsw on Add and Mul.
6892 bool NonMonotonic = false;
6893 assert(TableSize >= 2 && "Should be a SingleValue table.");
6894 // Check if there is the same distance between two consecutive values.
6895 for (uint64_t I = 0; I < TableSize; ++I) {
6896 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6897
6898 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6899 // This is an poison, so it's (probably) a lookup table hole.
6900 // To prevent any regressions from before we switched to using poison as
6901 // the default value, holes will fall back to using the first value.
6902 // This can be removed once we add proper handling for poisons in lookup
6903 // tables.
6904 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6905 }
6906
6907 if (!ConstVal) {
6908 // This is an undef. We could deal with it, but undefs in lookup tables
6909 // are very seldom. It's probably not worth the additional complexity.
6910 LinearMappingPossible = false;
6911 break;
6912 }
6913 const APInt &Val = ConstVal->getValue();
6914 if (I != 0) {
6915 APInt Dist = Val - PrevVal;
6916 if (I == 1) {
6917 DistToPrev = Dist;
6918 } else if (Dist != DistToPrev) {
6919 LinearMappingPossible = false;
6920 break;
6921 }
6922 NonMonotonic |=
6923 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6924 }
6925 PrevVal = Val;
6926 }
6927 if (LinearMappingPossible) {
6928 LinearOffset = cast<ConstantInt>(TableContents[0]);
6929 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6930 APInt M = LinearMultiplier->getValue();
6931 bool MayWrap = true;
6932 if (isIntN(M.getBitWidth(), TableSize - 1))
6933 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6934 LinearMapValWrapped = NonMonotonic || MayWrap;
6935 Kind = LinearMapKind;
6936 return;
6937 }
6938 }
6939
6940 // If the type is integer and the table fits in a register, build a bitmap.
6941 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6943 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6944 for (uint64_t I = TableSize; I > 0; --I) {
6945 TableInt <<= IT->getBitWidth();
6946 // Insert values into the bitmap. Undef values are set to zero.
6947 if (!isa<UndefValue>(TableContents[I - 1])) {
6948 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6949 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6950 }
6951 }
6952 BitMap = ConstantInt::get(M.getContext(), TableInt);
6953 BitMapElementTy = IT;
6954 Kind = BitMapKind;
6955 return;
6956 }
6957
6958 // Store the table in an array.
6959 auto *TableTy = ArrayType::get(ValueType, TableSize);
6960 Initializer = ConstantArray::get(TableTy, TableContents);
6961
6962 Kind = LookupTableKind;
6963}
6964
6965Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6966 const DataLayout &DL, Function *Func) {
6967 switch (Kind) {
6968 case SingleValueKind:
6969 return SingleValue;
6970 case LinearMapKind: {
6971 ++NumLinearMaps;
6972 // Derive the result value from the input value.
6973 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6974 false, "switch.idx.cast");
6975 if (!LinearMultiplier->isOne())
6976 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6977 /*HasNUW = */ false,
6978 /*HasNSW = */ !LinearMapValWrapped);
6979
6980 if (!LinearOffset->isZero())
6981 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6982 /*HasNUW = */ false,
6983 /*HasNSW = */ !LinearMapValWrapped);
6984 return Result;
6985 }
6986 case BitMapKind: {
6987 ++NumBitMaps;
6988 // Type of the bitmap (e.g. i59).
6989 IntegerType *MapTy = BitMap->getIntegerType();
6990
6991 // Cast Index to the same type as the bitmap.
6992 // Note: The Index is <= the number of elements in the table, so
6993 // truncating it to the width of the bitmask is safe.
6994 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6995
6996 // Multiply the shift amount by the element width. NUW/NSW can always be
6997 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6998 // BitMap's bit width.
6999 ShiftAmt = Builder.CreateMul(
7000 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7001 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7002
7003 // Shift down.
7004 Value *DownShifted =
7005 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7006 // Mask off.
7007 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7008 }
7009 case LookupTableKind: {
7010 ++NumLookupTables;
7011 auto *Table =
7012 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7013 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7014 Initializer, "switch.table." + Func->getName());
7015 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7016 // Set the alignment to that of an array items. We will be only loading one
7017 // value out of it.
7018 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7019 Type *IndexTy = DL.getIndexType(Table->getType());
7020 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7021
7022 if (Index->getType() != IndexTy) {
7023 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7024 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7025 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7026 Zext->setNonNeg(
7027 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7028 }
7029
7030 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7031 Value *GEP =
7032 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7033 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7034 }
7035 }
7036 llvm_unreachable("Unknown helper kind!");
7037}
7038
7039bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7040 uint64_t TableSize,
7041 Type *ElementType) {
7042 auto *IT = dyn_cast<IntegerType>(ElementType);
7043 if (!IT)
7044 return false;
7045 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7046 // are <= 15, we could try to narrow the type.
7047
7048 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7049 if (TableSize >= UINT_MAX / IT->getBitWidth())
7050 return false;
7051 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7052}
7053
7055 const DataLayout &DL) {
7056 // Allow any legal type.
7057 if (TTI.isTypeLegal(Ty))
7058 return true;
7059
7060 auto *IT = dyn_cast<IntegerType>(Ty);
7061 if (!IT)
7062 return false;
7063
7064 // Also allow power of 2 integer types that have at least 8 bits and fit in
7065 // a register. These types are common in frontend languages and targets
7066 // usually support loads of these types.
7067 // TODO: We could relax this to any integer that fits in a register and rely
7068 // on ABI alignment and padding in the table to allow the load to be widened.
7069 // Or we could widen the constants and truncate the load.
7070 unsigned BitWidth = IT->getBitWidth();
7071 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7072 DL.fitsInLegalInteger(IT->getBitWidth());
7073}
7074
7075Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7076
7077bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7078
7079bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7080
7081static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7082 // 40% is the default density for building a jump table in optsize/minsize
7083 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7084 // function was based on.
7085 const uint64_t MinDensity = 40;
7086
7087 if (CaseRange >= UINT64_MAX / 100)
7088 return false; // Avoid multiplication overflows below.
7089
7090 return NumCases * 100 >= CaseRange * MinDensity;
7091}
7092
7094 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7095 uint64_t Range = Diff + 1;
7096 if (Range < Diff)
7097 return false; // Overflow.
7098
7099 return isSwitchDense(Values.size(), Range);
7100}
7101
7102/// Determine whether a lookup table should be built for this switch, based on
7103/// the number of cases, size of the table, and the types of the results.
7104// TODO: We could support larger than legal types by limiting based on the
7105// number of loads required and/or table size. If the constants are small we
7106// could use smaller table entries and extend after the load.
7108 const TargetTransformInfo &TTI,
7109 const DataLayout &DL,
7110 const SmallVector<Type *> &ResultTypes) {
7111 if (SI->getNumCases() > TableSize)
7112 return false; // TableSize overflowed.
7113
7114 bool AllTablesFitInRegister = true;
7115 bool HasIllegalType = false;
7116 for (const auto &Ty : ResultTypes) {
7117 // Saturate this flag to true.
7118 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7119
7120 // Saturate this flag to false.
7121 AllTablesFitInRegister =
7122 AllTablesFitInRegister &&
7123 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7124
7125 // If both flags saturate, we're done. NOTE: This *only* works with
7126 // saturating flags, and all flags have to saturate first due to the
7127 // non-deterministic behavior of iterating over a dense map.
7128 if (HasIllegalType && !AllTablesFitInRegister)
7129 break;
7130 }
7131
7132 // If each table would fit in a register, we should build it anyway.
7133 if (AllTablesFitInRegister)
7134 return true;
7135
7136 // Don't build a table that doesn't fit in-register if it has illegal types.
7137 if (HasIllegalType)
7138 return false;
7139
7140 return isSwitchDense(SI->getNumCases(), TableSize);
7141}
7142
7144 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7145 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7146 const DataLayout &DL, const TargetTransformInfo &TTI) {
7147 if (MinCaseVal.isNullValue())
7148 return true;
7149 if (MinCaseVal.isNegative() ||
7150 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7151 !HasDefaultResults)
7152 return false;
7153 return all_of(ResultTypes, [&](const auto &ResultType) {
7154 return SwitchReplacement::wouldFitInRegister(
7155 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7156 });
7157}
7158
7159/// Try to reuse the switch table index compare. Following pattern:
7160/// \code
7161/// if (idx < tablesize)
7162/// r = table[idx]; // table does not contain default_value
7163/// else
7164/// r = default_value;
7165/// if (r != default_value)
7166/// ...
7167/// \endcode
7168/// Is optimized to:
7169/// \code
7170/// cond = idx < tablesize;
7171/// if (cond)
7172/// r = table[idx];
7173/// else
7174/// r = default_value;
7175/// if (cond)
7176/// ...
7177/// \endcode
7178/// Jump threading will then eliminate the second if(cond).
7180 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7181 Constant *DefaultValue,
7182 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7184 if (!CmpInst)
7185 return;
7186
7187 // We require that the compare is in the same block as the phi so that jump
7188 // threading can do its work afterwards.
7189 if (CmpInst->getParent() != PhiBlock)
7190 return;
7191
7193 if (!CmpOp1)
7194 return;
7195
7196 Value *RangeCmp = RangeCheckBranch->getCondition();
7197 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7198 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7199
7200 // Check if the compare with the default value is constant true or false.
7201 const DataLayout &DL = PhiBlock->getDataLayout();
7203 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7204 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7205 return;
7206
7207 // Check if the compare with the case values is distinct from the default
7208 // compare result.
7209 for (auto ValuePair : Values) {
7211 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7212 if (!CaseConst || CaseConst == DefaultConst ||
7213 (CaseConst != TrueConst && CaseConst != FalseConst))
7214 return;
7215 }
7216
7217 // Check if the branch instruction dominates the phi node. It's a simple
7218 // dominance check, but sufficient for our needs.
7219 // Although this check is invariant in the calling loops, it's better to do it
7220 // at this late stage. Practically we do it at most once for a switch.
7221 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7222 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7223 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7224 return;
7225 }
7226
7227 if (DefaultConst == FalseConst) {
7228 // The compare yields the same result. We can replace it.
7229 CmpInst->replaceAllUsesWith(RangeCmp);
7230 ++NumTableCmpReuses;
7231 } else {
7232 // The compare yields the same result, just inverted. We can replace it.
7233 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7234 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7235 RangeCheckBranch->getIterator());
7236 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7237 ++NumTableCmpReuses;
7238 }
7239}
7240
7241/// If the switch is only used to initialize one or more phi nodes in a common
7242/// successor block with different constant values, replace the switch with
7243/// lookup tables.
7245 DomTreeUpdater *DTU, const DataLayout &DL,
7246 const TargetTransformInfo &TTI,
7247 bool ConvertSwitchToLookupTable) {
7248 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7249
7250 BasicBlock *BB = SI->getParent();
7251 Function *Fn = BB->getParent();
7252
7253 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7254 // split off a dense part and build a lookup table for that.
7255
7256 // FIXME: This creates arrays of GEPs to constant strings, which means each
7257 // GEP needs a runtime relocation in PIC code. We should just build one big
7258 // string and lookup indices into that.
7259
7260 // Ignore switches with less than three cases. Lookup tables will not make
7261 // them faster, so we don't analyze them.
7262 if (SI->getNumCases() < 3)
7263 return false;
7264
7265 // Figure out the corresponding result for each case value and phi node in the
7266 // common destination, as well as the min and max case values.
7267 assert(!SI->cases().empty());
7268 SwitchInst::CaseIt CI = SI->case_begin();
7269 ConstantInt *MinCaseVal = CI->getCaseValue();
7270 ConstantInt *MaxCaseVal = CI->getCaseValue();
7271
7272 BasicBlock *CommonDest = nullptr;
7273
7274 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7276
7278 SmallVector<Type *> ResultTypes;
7280
7281 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7282 ConstantInt *CaseVal = CI->getCaseValue();
7283 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7284 MinCaseVal = CaseVal;
7285 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7286 MaxCaseVal = CaseVal;
7287
7288 // Resulting value at phi nodes for this case value.
7290 ResultsTy Results;
7291 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7292 Results, DL, TTI))
7293 return false;
7294
7295 // Append the result and result types from this case to the list for each
7296 // phi.
7297 for (const auto &I : Results) {
7298 PHINode *PHI = I.first;
7299 Constant *Value = I.second;
7300 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7301 if (Inserted)
7302 PHIs.push_back(PHI);
7303 It->second.push_back(std::make_pair(CaseVal, Value));
7304 ResultTypes.push_back(PHI->getType());
7305 }
7306 }
7307
7308 // If the table has holes, we need a constant result for the default case
7309 // or a bitmask that fits in a register.
7310 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7311 bool HasDefaultResults =
7312 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7313 DefaultResultsList, DL, TTI);
7314 for (const auto &I : DefaultResultsList) {
7315 PHINode *PHI = I.first;
7316 Constant *Result = I.second;
7317 DefaultResults[PHI] = Result;
7318 }
7319
7320 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7321 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7322 uint64_t TableSize;
7323 ConstantInt *TableIndexOffset;
7324 if (UseSwitchConditionAsTableIndex) {
7325 TableSize = MaxCaseVal->getLimitedValue() + 1;
7326 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7327 } else {
7328 TableSize =
7329 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7330
7331 TableIndexOffset = MinCaseVal;
7332 }
7333
7334 // If the default destination is unreachable, or if the lookup table covers
7335 // all values of the conditional variable, branch directly to the lookup table
7336 // BB. Otherwise, check that the condition is within the case range.
7337 uint64_t NumResults = ResultLists[PHIs[0]].size();
7338 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7339
7340 bool TableHasHoles = (NumResults < TableSize);
7341
7342 // If the table has holes but the default destination doesn't produce any
7343 // constant results, the lookup table entries corresponding to the holes will
7344 // contain poison.
7345 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7346
7347 // If the default destination doesn't produce a constant result but is still
7348 // reachable, and the lookup table has holes, we need to use a mask to
7349 // determine if the current index should load from the lookup table or jump
7350 // to the default case.
7351 // The mask is unnecessary if the table has holes but the default destination
7352 // is unreachable, as in that case the holes must also be unreachable.
7353 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7354 if (NeedMask) {
7355 // As an extra penalty for the validity test we require more cases.
7356 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7357 return false;
7358 if (!DL.fitsInLegalInteger(TableSize))
7359 return false;
7360 }
7361
7362 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7363 return false;
7364
7365 // Compute the table index value.
7366 Value *TableIndex;
7367 if (UseSwitchConditionAsTableIndex) {
7368 TableIndex = SI->getCondition();
7369 if (HasDefaultResults) {
7370 // Grow the table to cover all possible index values to avoid the range
7371 // check. It will use the default result to fill in the table hole later,
7372 // so make sure it exist.
7373 ConstantRange CR =
7374 computeConstantRange(TableIndex, /* ForSigned */ false);
7375 // Grow the table shouldn't have any size impact by checking
7376 // wouldFitInRegister.
7377 // TODO: Consider growing the table also when it doesn't fit in a register
7378 // if no optsize is specified.
7379 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7380 if (!CR.isUpperWrapped() &&
7381 all_of(ResultTypes, [&](const auto &ResultType) {
7382 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7383 ResultType);
7384 })) {
7385 // There may be some case index larger than the UpperBound (unreachable
7386 // case), so make sure the table size does not get smaller.
7387 TableSize = std::max(UpperBound, TableSize);
7388 // The default branch is unreachable after we enlarge the lookup table.
7389 // Adjust DefaultIsReachable to reuse code path.
7390 DefaultIsReachable = false;
7391 }
7392 }
7393 }
7394
7395 // Keep track of the switch replacement for each phi
7397 for (PHINode *PHI : PHIs) {
7398 const auto &ResultList = ResultLists[PHI];
7399
7400 Type *ResultType = ResultList.begin()->second->getType();
7401 // Use any value to fill the lookup table holes.
7403 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7404 StringRef FuncName = Fn->getName();
7405 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7406 ResultList, DefaultVal, DL, FuncName);
7407 PhiToReplacementMap.insert({PHI, Replacement});
7408 }
7409
7410 bool AnyLookupTables = any_of(
7411 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7412 bool AnyBitMaps = any_of(PhiToReplacementMap,
7413 [](auto &KV) { return KV.second.isBitMap(); });
7414
7415 // A few conditions prevent the generation of lookup tables:
7416 // 1. The target does not support lookup tables.
7417 // 2. The "no-jump-tables" function attribute is set.
7418 // However, these objections do not apply to other switch replacements, like
7419 // the bitmap, so we only stop here if any of these conditions are met and we
7420 // want to create a LUT. Otherwise, continue with the switch replacement.
7421 if (AnyLookupTables &&
7422 (!TTI.shouldBuildLookupTables() ||
7423 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7424 return false;
7425
7426 // In the early optimization pipeline, disable formation of lookup tables,
7427 // bit maps and mask checks, as they may inhibit further optimization.
7428 if (!ConvertSwitchToLookupTable &&
7429 (AnyLookupTables || AnyBitMaps || NeedMask))
7430 return false;
7431
7432 Builder.SetInsertPoint(SI);
7433 // TableIndex is the switch condition - TableIndexOffset if we don't
7434 // use the condition directly
7435 if (!UseSwitchConditionAsTableIndex) {
7436 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7437 // we can try to attach nsw.
7438 bool MayWrap = true;
7439 if (!DefaultIsReachable) {
7440 APInt Res =
7441 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7442 (void)Res;
7443 }
7444 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7445 "switch.tableidx", /*HasNUW =*/false,
7446 /*HasNSW =*/!MayWrap);
7447 }
7448
7449 std::vector<DominatorTree::UpdateType> Updates;
7450
7451 // Compute the maximum table size representable by the integer type we are
7452 // switching upon.
7453 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7454 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7455 assert(MaxTableSize >= TableSize &&
7456 "It is impossible for a switch to have more entries than the max "
7457 "representable value of its input integer type's size.");
7458
7459 // Create the BB that does the lookups.
7460 Module &Mod = *CommonDest->getParent()->getParent();
7461 BasicBlock *LookupBB = BasicBlock::Create(
7462 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7463
7464 BranchInst *RangeCheckBranch = nullptr;
7465 BranchInst *CondBranch = nullptr;
7466
7467 Builder.SetInsertPoint(SI);
7468 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7469 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7470 Builder.CreateBr(LookupBB);
7471 if (DTU)
7472 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7473 // Note: We call removeProdecessor later since we need to be able to get the
7474 // PHI value for the default case in case we're using a bit mask.
7475 } else {
7476 Value *Cmp = Builder.CreateICmpULT(
7477 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7478 RangeCheckBranch =
7479 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7480 CondBranch = RangeCheckBranch;
7481 if (DTU)
7482 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7483 }
7484
7485 // Populate the BB that does the lookups.
7486 Builder.SetInsertPoint(LookupBB);
7487
7488 if (NeedMask) {
7489 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7490 // re-purposed to do the hole check, and we create a new LookupBB.
7491 BasicBlock *MaskBB = LookupBB;
7492 MaskBB->setName("switch.hole_check");
7493 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7494 CommonDest->getParent(), CommonDest);
7495
7496 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7497 // unnecessary illegal types.
7498 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7499 APInt MaskInt(TableSizePowOf2, 0);
7500 APInt One(TableSizePowOf2, 1);
7501 // Build bitmask; fill in a 1 bit for every case.
7502 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7503 for (const auto &Result : ResultList) {
7504 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7505 .getLimitedValue();
7506 MaskInt |= One << Idx;
7507 }
7508 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7509
7510 // Get the TableIndex'th bit of the bitmask.
7511 // If this bit is 0 (meaning hole) jump to the default destination,
7512 // else continue with table lookup.
7513 IntegerType *MapTy = TableMask->getIntegerType();
7514 Value *MaskIndex =
7515 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7516 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7517 Value *LoBit = Builder.CreateTrunc(
7518 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7519 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7520 if (DTU) {
7521 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7522 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7523 }
7524 Builder.SetInsertPoint(LookupBB);
7525 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7526 }
7527
7528 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7529 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7530 // do not delete PHINodes here.
7531 SI->getDefaultDest()->removePredecessor(BB,
7532 /*KeepOneInputPHIs=*/true);
7533 if (DTU)
7534 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7535 }
7536
7537 for (PHINode *PHI : PHIs) {
7538 const ResultListTy &ResultList = ResultLists[PHI];
7539 auto Replacement = PhiToReplacementMap.at(PHI);
7540 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7541 // Do a small peephole optimization: re-use the switch table compare if
7542 // possible.
7543 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7544 BasicBlock *PhiBlock = PHI->getParent();
7545 // Search for compare instructions which use the phi.
7546 for (auto *User : PHI->users()) {
7547 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7548 Replacement.getDefaultValue(), ResultList);
7549 }
7550 }
7551
7552 PHI->addIncoming(Result, LookupBB);
7553 }
7554
7555 Builder.CreateBr(CommonDest);
7556 if (DTU)
7557 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7558
7559 SmallVector<uint32_t> BranchWeights;
7560 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7561 extractBranchWeights(*SI, BranchWeights);
7562 uint64_t ToLookupWeight = 0;
7563 uint64_t ToDefaultWeight = 0;
7564
7565 // Remove the switch.
7566 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7567 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7568 BasicBlock *Succ = SI->getSuccessor(I);
7569
7570 if (Succ == SI->getDefaultDest()) {
7571 if (HasBranchWeights)
7572 ToDefaultWeight += BranchWeights[I];
7573 continue;
7574 }
7575 Succ->removePredecessor(BB);
7576 if (DTU && RemovedSuccessors.insert(Succ).second)
7577 Updates.push_back({DominatorTree::Delete, BB, Succ});
7578 if (HasBranchWeights)
7579 ToLookupWeight += BranchWeights[I];
7580 }
7581 SI->eraseFromParent();
7582 if (HasBranchWeights)
7583 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7584 /*IsExpected=*/false);
7585 if (DTU)
7586 DTU->applyUpdates(Updates);
7587
7588 if (NeedMask)
7589 ++NumLookupTablesHoles;
7590 return true;
7591}
7592
7593/// Try to transform a switch that has "holes" in it to a contiguous sequence
7594/// of cases.
7595///
7596/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7597/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7598///
7599/// This converts a sparse switch into a dense switch which allows better
7600/// lowering and could also allow transforming into a lookup table.
7602 const DataLayout &DL,
7603 const TargetTransformInfo &TTI) {
7604 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7605 if (CondTy->getIntegerBitWidth() > 64 ||
7606 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7607 return false;
7608 // Only bother with this optimization if there are more than 3 switch cases;
7609 // SDAG will only bother creating jump tables for 4 or more cases.
7610 if (SI->getNumCases() < 4)
7611 return false;
7612
7613 // This transform is agnostic to the signedness of the input or case values. We
7614 // can treat the case values as signed or unsigned. We can optimize more common
7615 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7616 // as signed.
7618 for (const auto &C : SI->cases())
7619 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7620 llvm::sort(Values);
7621
7622 // If the switch is already dense, there's nothing useful to do here.
7623 if (isSwitchDense(Values))
7624 return false;
7625
7626 // First, transform the values such that they start at zero and ascend.
7627 int64_t Base = Values[0];
7628 for (auto &V : Values)
7629 V -= (uint64_t)(Base);
7630
7631 // Now we have signed numbers that have been shifted so that, given enough
7632 // precision, there are no negative values. Since the rest of the transform
7633 // is bitwise only, we switch now to an unsigned representation.
7634
7635 // This transform can be done speculatively because it is so cheap - it
7636 // results in a single rotate operation being inserted.
7637
7638 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7639 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7640 // less than 64.
7641 unsigned Shift = 64;
7642 for (auto &V : Values)
7643 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7644 assert(Shift < 64);
7645 if (Shift > 0)
7646 for (auto &V : Values)
7647 V = (int64_t)((uint64_t)V >> Shift);
7648
7649 if (!isSwitchDense(Values))
7650 // Transform didn't create a dense switch.
7651 return false;
7652
7653 // The obvious transform is to shift the switch condition right and emit a
7654 // check that the condition actually cleanly divided by GCD, i.e.
7655 // C & (1 << Shift - 1) == 0
7656 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7657 //
7658 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7659 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7660 // are nonzero then the switch condition will be very large and will hit the
7661 // default case.
7662
7663 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7664 Builder.SetInsertPoint(SI);
7665 Value *Sub =
7666 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7667 Value *Rot = Builder.CreateIntrinsic(
7668 Ty, Intrinsic::fshl,
7669 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7670 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7671
7672 for (auto Case : SI->cases()) {
7673 auto *Orig = Case.getCaseValue();
7674 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7675 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7676 }
7677 return true;
7678}
7679
7680/// Tries to transform the switch when the condition is umin with a constant.
7681/// In that case, the default branch can be replaced by the constant's branch.
7682/// This method also removes dead cases when the simplification cannot replace
7683/// the default branch.
7684///
7685/// For example:
7686/// switch(umin(a, 3)) {
7687/// case 0:
7688/// case 1:
7689/// case 2:
7690/// case 3:
7691/// case 4:
7692/// // ...
7693/// default:
7694/// unreachable
7695/// }
7696///
7697/// Transforms into:
7698///
7699/// switch(a) {
7700/// case 0:
7701/// case 1:
7702/// case 2:
7703/// default:
7704/// // This is case 3
7705/// }
7707 Value *A;
7709
7710 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7711 return false;
7712
7715 BasicBlock *BB = SIW->getParent();
7716
7717 // Dead cases are removed even when the simplification fails.
7718 // A case is dead when its value is higher than the Constant.
7719 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7720 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7721 ++I;
7722 continue;
7723 }
7724 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7725 DeadCaseBB->removePredecessor(BB);
7726 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7727 I = SIW->removeCase(I);
7728 E = SIW->case_end();
7729 }
7730
7731 auto Case = SI->findCaseValue(Constant);
7732 // If the case value is not found, `findCaseValue` returns the default case.
7733 // In this scenario, since there is no explicit `case 3:`, the simplification
7734 // fails. The simplification also fails when the switch’s default destination
7735 // is reachable.
7736 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7737 if (DTU)
7738 DTU->applyUpdates(Updates);
7739 return !Updates.empty();
7740 }
7741
7742 BasicBlock *Unreachable = SI->getDefaultDest();
7743 SIW.replaceDefaultDest(Case);
7744 SIW.removeCase(Case);
7745 SIW->setCondition(A);
7746
7747 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7748
7749 if (DTU)
7750 DTU->applyUpdates(Updates);
7751
7752 return true;
7753}
7754
7755/// Tries to transform switch of powers of two to reduce switch range.
7756/// For example, switch like:
7757/// switch (C) { case 1: case 2: case 64: case 128: }
7758/// will be transformed to:
7759/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7760///
7761/// This transformation allows better lowering and may transform the switch
7762/// instruction into a sequence of bit manipulation and a smaller
7763/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7764/// address of the jump target, and indirectly jump to it).
7766 DomTreeUpdater *DTU,
7767 const DataLayout &DL,
7768 const TargetTransformInfo &TTI) {
7769 Value *Condition = SI->getCondition();
7770 LLVMContext &Context = SI->getContext();
7771 auto *CondTy = cast<IntegerType>(Condition->getType());
7772
7773 if (CondTy->getIntegerBitWidth() > 64 ||
7774 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7775 return false;
7776
7777 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7778 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7779 {Condition, ConstantInt::getTrue(Context)});
7780 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7781 TTI::TCC_Basic * 2)
7782 return false;
7783
7784 // Only bother with this optimization if there are more than 3 switch cases.
7785 // SDAG will start emitting jump tables for 4 or more cases.
7786 if (SI->getNumCases() < 4)
7787 return false;
7788
7789 // Check that switch cases are powers of two.
7791 for (const auto &Case : SI->cases()) {
7792 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7793 if (llvm::has_single_bit(CaseValue))
7794 Values.push_back(CaseValue);
7795 else
7796 return false;
7797 }
7798
7799 // isSwichDense requires case values to be sorted.
7800 llvm::sort(Values);
7801 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7802 llvm::countr_zero(Values.front()) + 1))
7803 // Transform is unable to generate dense switch.
7804 return false;
7805
7806 Builder.SetInsertPoint(SI);
7807
7808 if (!SI->defaultDestUnreachable()) {
7809 // Let non-power-of-two inputs jump to the default case, when the latter is
7810 // reachable.
7811 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7812 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7813
7814 auto *OrigBB = SI->getParent();
7815 auto *DefaultCaseBB = SI->getDefaultDest();
7816 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7817 auto It = OrigBB->getTerminator()->getIterator();
7818 SmallVector<uint32_t> Weights;
7819 auto HasWeights =
7821 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7822 if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7823 // IsPow2 covers a subset of the cases in which we'd go to the default
7824 // label. The other is those powers of 2 that don't appear in the case
7825 // statement. We don't know the distribution of the values coming in, so
7826 // the safest is to split 50-50 the original probability to `default`.
7827 uint64_t OrigDenominator =
7829 SmallVector<uint64_t> NewWeights(2);
7830 NewWeights[1] = Weights[0] / 2;
7831 NewWeights[0] = OrigDenominator - NewWeights[1];
7832 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7833 // The probability of executing the default block stays constant. It was
7834 // p_d = Weights[0] / OrigDenominator
7835 // we rewrite as W/D
7836 // We want to find the probability of the default branch of the switch
7837 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7838 // i.e. the original probability is the probability we go to the default
7839 // branch from the BI branch, or we take the default branch on the SI.
7840 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7841 // This matches using W/2 for the default branch probability numerator and
7842 // D-W/2 as the denominator.
7843 Weights[0] = NewWeights[1];
7844 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7845 for (auto &W : drop_begin(Weights))
7846 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7847
7848 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7849 }
7850 // BI is handling the default case for SI, and so should share its DebugLoc.
7851 BI->setDebugLoc(SI->getDebugLoc());
7852 It->eraseFromParent();
7853
7854 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7855 if (DTU)
7856 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7857 }
7858
7859 // Replace each case with its trailing zeros number.
7860 for (auto &Case : SI->cases()) {
7861 auto *OrigValue = Case.getCaseValue();
7862 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7863 OrigValue->getValue().countr_zero()));
7864 }
7865
7866 // Replace condition with its trailing zeros number.
7867 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7868 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7869
7870 SI->setCondition(ConditionTrailingZeros);
7871
7872 return true;
7873}
7874
7875/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7876/// the same destination.
7878 DomTreeUpdater *DTU) {
7879 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7880 if (!Cmp || !Cmp->hasOneUse())
7881 return false;
7882
7884 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7885 if (!HasWeights)
7886 Weights.resize(4); // Avoid checking HasWeights everywhere.
7887
7888 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7889 int64_t Res;
7890 BasicBlock *Succ, *OtherSucc;
7891 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7892 BasicBlock *Unreachable = nullptr;
7893
7894 if (SI->getNumCases() == 2) {
7895 // Find which of 1, 0 or -1 is missing (handled by default dest).
7896 SmallSet<int64_t, 3> Missing;
7897 Missing.insert(1);
7898 Missing.insert(0);
7899 Missing.insert(-1);
7900
7901 Succ = SI->getDefaultDest();
7902 SuccWeight = Weights[0];
7903 OtherSucc = nullptr;
7904 for (auto &Case : SI->cases()) {
7905 std::optional<int64_t> Val =
7906 Case.getCaseValue()->getValue().trySExtValue();
7907 if (!Val)
7908 return false;
7909 if (!Missing.erase(*Val))
7910 return false;
7911 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7912 return false;
7913 OtherSucc = Case.getCaseSuccessor();
7914 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7915 }
7916
7917 assert(Missing.size() == 1 && "Should have one case left");
7918 Res = *Missing.begin();
7919 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7920 // Normalize so that Succ is taken once and OtherSucc twice.
7921 Unreachable = SI->getDefaultDest();
7922 Succ = OtherSucc = nullptr;
7923 for (auto &Case : SI->cases()) {
7924 BasicBlock *NewSucc = Case.getCaseSuccessor();
7925 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7926 if (!OtherSucc || OtherSucc == NewSucc) {
7927 OtherSucc = NewSucc;
7928 OtherSuccWeight += Weight;
7929 } else if (!Succ) {
7930 Succ = NewSucc;
7931 SuccWeight = Weight;
7932 } else if (Succ == NewSucc) {
7933 std::swap(Succ, OtherSucc);
7934 std::swap(SuccWeight, OtherSuccWeight);
7935 } else
7936 return false;
7937 }
7938 for (auto &Case : SI->cases()) {
7939 std::optional<int64_t> Val =
7940 Case.getCaseValue()->getValue().trySExtValue();
7941 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7942 return false;
7943 if (Case.getCaseSuccessor() == Succ) {
7944 Res = *Val;
7945 break;
7946 }
7947 }
7948 } else {
7949 return false;
7950 }
7951
7952 // Determine predicate for the missing case.
7954 switch (Res) {
7955 case 1:
7956 Pred = ICmpInst::ICMP_UGT;
7957 break;
7958 case 0:
7959 Pred = ICmpInst::ICMP_EQ;
7960 break;
7961 case -1:
7962 Pred = ICmpInst::ICMP_ULT;
7963 break;
7964 }
7965 if (Cmp->isSigned())
7966 Pred = ICmpInst::getSignedPredicate(Pred);
7967
7968 MDNode *NewWeights = nullptr;
7969 if (HasWeights)
7970 NewWeights = MDBuilder(SI->getContext())
7971 .createBranchWeights(SuccWeight, OtherSuccWeight);
7972
7973 BasicBlock *BB = SI->getParent();
7974 Builder.SetInsertPoint(SI->getIterator());
7975 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7976 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7977 SI->getMetadata(LLVMContext::MD_unpredictable));
7978 OtherSucc->removePredecessor(BB);
7979 if (Unreachable)
7980 Unreachable->removePredecessor(BB);
7981 SI->eraseFromParent();
7982 Cmp->eraseFromParent();
7983 if (DTU && Unreachable)
7984 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7985 return true;
7986}
7987
7988/// Checking whether two cases of SI are equal depends on the contents of the
7989/// BasicBlock and the incoming values of their successor PHINodes.
7990/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7991/// calling this function on each BasicBlock every time isEqual is called,
7992/// especially since the same BasicBlock may be passed as an argument multiple
7993/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7994/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7995/// of the incoming values.
8000
8003 return static_cast<SwitchSuccWrapper *>(
8005 }
8007 return static_cast<SwitchSuccWrapper *>(
8009 }
8010 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8011 BasicBlock *Succ = SSW->Dest;
8013 assert(BI->isUnconditional() &&
8014 "Only supporting unconditional branches for now");
8015 assert(BI->getNumSuccessors() == 1 &&
8016 "Expected unconditional branches to have one successor");
8017 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8018
8019 // Since we assume the BB is just a single BranchInst with a single
8020 // successor, we hash as the BB and the incoming Values of its successor
8021 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8022 // including the incoming PHI values leads to better performance.
8023 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8024 // time and passing it in SwitchSuccWrapper, but this slowed down the
8025 // average compile time without having any impact on the worst case compile
8026 // time.
8027 BasicBlock *BB = BI->getSuccessor(0);
8028 SmallVector<Value *> PhiValsForBB;
8029 for (PHINode &Phi : BB->phis())
8030 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
8031
8032 return hash_combine(BB, hash_combine_range(PhiValsForBB));
8033 }
8034 static bool isEqual(const SwitchSuccWrapper *LHS,
8035 const SwitchSuccWrapper *RHS) {
8038 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8039 return LHS == RHS;
8040
8041 BasicBlock *A = LHS->Dest;
8042 BasicBlock *B = RHS->Dest;
8043
8044 // FIXME: we checked that the size of A and B are both 1 in
8045 // simplifyDuplicateSwitchArms to make the Case list smaller to
8046 // improve performance. If we decide to support BasicBlocks with more
8047 // than just a single instruction, we need to check that A.size() ==
8048 // B.size() here, and we need to check more than just the BranchInsts
8049 // for equality.
8050
8051 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
8052 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
8053 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8054 "Only supporting unconditional branches for now");
8055 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
8056 return false;
8057
8058 // Need to check that PHIs in successor have matching values
8059 BasicBlock *Succ = ABI->getSuccessor(0);
8060 for (PHINode &Phi : Succ->phis()) {
8061 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8062 if (PredIVs[A] != PredIVs[B])
8063 return false;
8064 }
8065
8066 return true;
8067 }
8068};
8069
8070bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8071 DomTreeUpdater *DTU) {
8072 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8073 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8074 // an entire PHI at once after the loop, opposed to calling
8075 // getIncomingValueForBlock inside this loop, since each call to
8076 // getIncomingValueForBlock is O(|Preds|).
8082 Cases.reserve(SI->getNumSuccessors());
8083
8084 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8085 BasicBlock *BB = SI->getSuccessor(I);
8086
8087 // FIXME: Support more than just a single BranchInst. One way we could do
8088 // this is by taking a hashing approach of all insts in BB.
8089 if (BB->size() != 1)
8090 continue;
8091
8092 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8093 // on other kinds of terminators. We decide to only support unconditional
8094 // branches for now for compile time reasons.
8095 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
8096 if (!BI || BI->isConditional())
8097 continue;
8098
8099 if (!Seen.insert(BB).second) {
8100 auto It = BBToSuccessorIndexes.find(BB);
8101 if (It != BBToSuccessorIndexes.end())
8102 It->second.emplace_back(I);
8103 continue;
8104 }
8105
8106 // FIXME: This case needs some extra care because the terminators other than
8107 // SI need to be updated. For now, consider only backedges to the SI.
8108 if (BB->getUniquePredecessor() != SI->getParent())
8109 continue;
8110
8111 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8112 for (BasicBlock *Succ : BI->successors())
8114
8115 // Add the successor only if not previously visited.
8116 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
8117 BBToSuccessorIndexes[BB].emplace_back(I);
8118 }
8119
8120 // Precompute a data structure to improve performance of isEqual for
8121 // SwitchSuccWrapper.
8122 PhiPredIVs.reserve(Phis.size());
8123 for (PHINode *Phi : Phis) {
8124 auto &IVs =
8125 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8126 for (auto &IV : Phi->incoming_values())
8127 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8128 }
8129
8130 // Build a set such that if the SwitchSuccWrapper exists in the set and
8131 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8132 // which is not in the set should be replaced with the one in the set. If the
8133 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8134 // other SwitchSuccWrappers can check against it in the same manner. We use
8135 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8136 // around information to isEquality, getHashValue, and when doing the
8137 // replacement with better performance.
8138 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8139 ReplaceWith.reserve(Cases.size());
8140
8142 Updates.reserve(ReplaceWith.size());
8143 bool MadeChange = false;
8144 for (auto &SSW : Cases) {
8145 // SSW is a candidate for simplification. If we find a duplicate BB,
8146 // replace it.
8147 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
8148 if (!Inserted) {
8149 // We know that SI's parent BB no longer dominates the old case successor
8150 // since we are making it dead.
8151 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
8152 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
8153 for (unsigned Idx : Successors)
8154 SI->setSuccessor(Idx, (*It)->Dest);
8155 MadeChange = true;
8156 }
8157 }
8158
8159 if (DTU)
8160 DTU->applyUpdates(Updates);
8161
8162 return MadeChange;
8163}
8164
8165bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8166 BasicBlock *BB = SI->getParent();
8167
8168 if (isValueEqualityComparison(SI)) {
8169 // If we only have one predecessor, and if it is a branch on this value,
8170 // see if that predecessor totally determines the outcome of this switch.
8171 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8172 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8173 return requestResimplify();
8174
8175 Value *Cond = SI->getCondition();
8176 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8177 if (simplifySwitchOnSelect(SI, Select))
8178 return requestResimplify();
8179
8180 // If the block only contains the switch, see if we can fold the block
8181 // away into any preds.
8182 if (SI == &*BB->instructionsWithoutDebug(false).begin())
8183 if (foldValueComparisonIntoPredecessors(SI, Builder))
8184 return requestResimplify();
8185 }
8186
8187 // Try to transform the switch into an icmp and a branch.
8188 // The conversion from switch to comparison may lose information on
8189 // impossible switch values, so disable it early in the pipeline.
8190 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8191 return requestResimplify();
8192
8193 // Remove unreachable cases.
8194 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8195 return requestResimplify();
8196
8197 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8198 return requestResimplify();
8199
8200 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8201 return requestResimplify();
8202
8203 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8204 return requestResimplify();
8205
8206 // The conversion of switches to arithmetic or lookup table is disabled in
8207 // the early optimization pipeline, as it may lose information or make the
8208 // resulting code harder to analyze.
8209 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8210 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8211 Options.ConvertSwitchToLookupTable))
8212 return requestResimplify();
8213
8214 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8215 return requestResimplify();
8216
8217 if (reduceSwitchRange(SI, Builder, DL, TTI))
8218 return requestResimplify();
8219
8220 if (HoistCommon &&
8221 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8222 return requestResimplify();
8223
8224 if (simplifyDuplicateSwitchArms(SI, DTU))
8225 return requestResimplify();
8226
8227 if (simplifySwitchWhenUMin(SI, DTU))
8228 return requestResimplify();
8229
8230 return false;
8231}
8232
8233bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8234 BasicBlock *BB = IBI->getParent();
8235 bool Changed = false;
8236 SmallVector<uint32_t> BranchWeights;
8237 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8238 extractBranchWeights(*IBI, BranchWeights);
8239
8240 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8241 if (HasBranchWeights)
8242 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8243 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8244
8245 // Eliminate redundant destinations.
8246 SmallPtrSet<Value *, 8> Succs;
8247 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8248 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8249 BasicBlock *Dest = IBI->getDestination(I);
8250 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8251 if (!Dest->hasAddressTaken())
8252 RemovedSuccs.insert(Dest);
8253 Dest->removePredecessor(BB);
8254 IBI->removeDestination(I);
8255 --I;
8256 --E;
8257 Changed = true;
8258 }
8259 }
8260
8261 if (DTU) {
8262 std::vector<DominatorTree::UpdateType> Updates;
8263 Updates.reserve(RemovedSuccs.size());
8264 for (auto *RemovedSucc : RemovedSuccs)
8265 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8266 DTU->applyUpdates(Updates);
8267 }
8268
8269 if (IBI->getNumDestinations() == 0) {
8270 // If the indirectbr has no successors, change it to unreachable.
8271 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8273 return true;
8274 }
8275
8276 if (IBI->getNumDestinations() == 1) {
8277 // If the indirectbr has one successor, change it to a direct branch.
8280 return true;
8281 }
8282 if (HasBranchWeights) {
8283 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8284 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8285 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8286 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8287 }
8288 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8289 if (simplifyIndirectBrOnSelect(IBI, SI))
8290 return requestResimplify();
8291 }
8292 return Changed;
8293}
8294
8295/// Given an block with only a single landing pad and a unconditional branch
8296/// try to find another basic block which this one can be merged with. This
8297/// handles cases where we have multiple invokes with unique landing pads, but
8298/// a shared handler.
8299///
8300/// We specifically choose to not worry about merging non-empty blocks
8301/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8302/// practice, the optimizer produces empty landing pad blocks quite frequently
8303/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8304/// sinking in this file)
8305///
8306/// This is primarily a code size optimization. We need to avoid performing
8307/// any transform which might inhibit optimization (such as our ability to
8308/// specialize a particular handler via tail commoning). We do this by not
8309/// merging any blocks which require us to introduce a phi. Since the same
8310/// values are flowing through both blocks, we don't lose any ability to
8311/// specialize. If anything, we make such specialization more likely.
8312///
8313/// TODO - This transformation could remove entries from a phi in the target
8314/// block when the inputs in the phi are the same for the two blocks being
8315/// merged. In some cases, this could result in removal of the PHI entirely.
8317 BasicBlock *BB, DomTreeUpdater *DTU) {
8318 auto Succ = BB->getUniqueSuccessor();
8319 assert(Succ);
8320 // If there's a phi in the successor block, we'd likely have to introduce
8321 // a phi into the merged landing pad block.
8322 if (isa<PHINode>(*Succ->begin()))
8323 return false;
8324
8325 for (BasicBlock *OtherPred : predecessors(Succ)) {
8326 if (BB == OtherPred)
8327 continue;
8328 BasicBlock::iterator I = OtherPred->begin();
8330 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8331 continue;
8332 ++I;
8334 if (!BI2 || !BI2->isIdenticalTo(BI))
8335 continue;
8336
8337 std::vector<DominatorTree::UpdateType> Updates;
8338
8339 // We've found an identical block. Update our predecessors to take that
8340 // path instead and make ourselves dead.
8342 for (BasicBlock *Pred : UniquePreds) {
8343 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8344 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8345 "unexpected successor");
8346 II->setUnwindDest(OtherPred);
8347 if (DTU) {
8348 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8349 Updates.push_back({DominatorTree::Delete, Pred, BB});
8350 }
8351 }
8352
8354 for (BasicBlock *Succ : UniqueSuccs) {
8355 Succ->removePredecessor(BB);
8356 if (DTU)
8357 Updates.push_back({DominatorTree::Delete, BB, Succ});
8358 }
8359
8360 IRBuilder<> Builder(BI);
8361 Builder.CreateUnreachable();
8362 BI->eraseFromParent();
8363 if (DTU)
8364 DTU->applyUpdates(Updates);
8365 return true;
8366 }
8367 return false;
8368}
8369
8370bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8371 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8372 : simplifyCondBranch(Branch, Builder);
8373}
8374
8375bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8376 IRBuilder<> &Builder) {
8377 BasicBlock *BB = BI->getParent();
8378 BasicBlock *Succ = BI->getSuccessor(0);
8379
8380 // If the Terminator is the only non-phi instruction, simplify the block.
8381 // If LoopHeader is provided, check if the block or its successor is a loop
8382 // header. (This is for early invocations before loop simplify and
8383 // vectorization to keep canonical loop forms for nested loops. These blocks
8384 // can be eliminated when the pass is invoked later in the back-end.)
8385 // Note that if BB has only one predecessor then we do not introduce new
8386 // backedge, so we can eliminate BB.
8387 bool NeedCanonicalLoop =
8388 Options.NeedCanonicalLoop &&
8389 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8390 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8392 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8393 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8394 return true;
8395
8396 // If the only instruction in the block is a seteq/setne comparison against a
8397 // constant, try to simplify the block.
8398 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8399 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8400 ++I;
8401 if (I->isTerminator() &&
8402 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8403 return true;
8404 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8405 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8406 Builder))
8407 return true;
8408 }
8409 }
8410
8411 // See if we can merge an empty landing pad block with another which is
8412 // equivalent.
8413 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8414 ++I;
8415 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8416 return true;
8417 }
8418
8419 // If this basic block is ONLY a compare and a branch, and if a predecessor
8420 // branches to us and our successor, fold the comparison into the
8421 // predecessor and use logical operations to update the incoming value
8422 // for PHI nodes in common successor.
8423 if (Options.SpeculateBlocks &&
8424 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8425 Options.BonusInstThreshold))
8426 return requestResimplify();
8427 return false;
8428}
8429
8431 BasicBlock *PredPred = nullptr;
8432 for (auto *P : predecessors(BB)) {
8433 BasicBlock *PPred = P->getSinglePredecessor();
8434 if (!PPred || (PredPred && PredPred != PPred))
8435 return nullptr;
8436 PredPred = PPred;
8437 }
8438 return PredPred;
8439}
8440
8441/// Fold the following pattern:
8442/// bb0:
8443/// br i1 %cond1, label %bb1, label %bb2
8444/// bb1:
8445/// br i1 %cond2, label %bb3, label %bb4
8446/// bb2:
8447/// br i1 %cond2, label %bb4, label %bb3
8448/// bb3:
8449/// ...
8450/// bb4:
8451/// ...
8452/// into
8453/// bb0:
8454/// %cond = xor i1 %cond1, %cond2
8455/// br i1 %cond, label %bb4, label %bb3
8456/// bb3:
8457/// ...
8458/// bb4:
8459/// ...
8460/// NOTE: %cond2 always dominates the terminator of bb0.
8462 BasicBlock *BB = BI->getParent();
8463 BasicBlock *BB1 = BI->getSuccessor(0);
8464 BasicBlock *BB2 = BI->getSuccessor(1);
8465 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8466 if (Succ == BB)
8467 return false;
8468 if (&Succ->front() != Succ->getTerminator())
8469 return false;
8470 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8471 if (!SuccBI || !SuccBI->isConditional())
8472 return false;
8473 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8474 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8475 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8476 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8477 };
8478 BranchInst *BB1BI, *BB2BI;
8479 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8480 return false;
8481
8482 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8483 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8484 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8485 return false;
8486
8487 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8488 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8489 IRBuilder<> Builder(BI);
8490 BI->setCondition(
8491 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8492 BB1->removePredecessor(BB);
8493 BI->setSuccessor(0, BB4);
8494 BB2->removePredecessor(BB);
8495 BI->setSuccessor(1, BB3);
8496 if (DTU) {
8498 Updates.push_back({DominatorTree::Delete, BB, BB1});
8499 Updates.push_back({DominatorTree::Insert, BB, BB4});
8500 Updates.push_back({DominatorTree::Delete, BB, BB2});
8501 Updates.push_back({DominatorTree::Insert, BB, BB3});
8502
8503 DTU->applyUpdates(Updates);
8504 }
8505 bool HasWeight = false;
8506 uint64_t BBTWeight, BBFWeight;
8507 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8508 HasWeight = true;
8509 else
8510 BBTWeight = BBFWeight = 1;
8511 uint64_t BB1TWeight, BB1FWeight;
8512 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8513 HasWeight = true;
8514 else
8515 BB1TWeight = BB1FWeight = 1;
8516 uint64_t BB2TWeight, BB2FWeight;
8517 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8518 HasWeight = true;
8519 else
8520 BB2TWeight = BB2FWeight = 1;
8521 if (HasWeight) {
8522 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8523 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8524 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8525 /*ElideAllZero=*/true);
8526 }
8527 return true;
8528}
8529
8530bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8531 assert(
8533 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8534 "Tautological conditional branch should have been eliminated already.");
8535
8536 BasicBlock *BB = BI->getParent();
8537 if (!Options.SimplifyCondBranch ||
8538 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8539 return false;
8540
8541 // Conditional branch
8542 if (isValueEqualityComparison(BI)) {
8543 // If we only have one predecessor, and if it is a branch on this value,
8544 // see if that predecessor totally determines the outcome of this
8545 // switch.
8546 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8547 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8548 return requestResimplify();
8549
8550 // This block must be empty, except for the setcond inst, if it exists.
8551 // Ignore dbg and pseudo intrinsics.
8552 auto I = BB->instructionsWithoutDebug(true).begin();
8553 if (&*I == BI) {
8554 if (foldValueComparisonIntoPredecessors(BI, Builder))
8555 return requestResimplify();
8556 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8557 ++I;
8558 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8559 return requestResimplify();
8560 }
8561 }
8562
8563 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8564 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8565 return true;
8566
8567 // If this basic block has dominating predecessor blocks and the dominating
8568 // blocks' conditions imply BI's condition, we know the direction of BI.
8569 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8570 if (Imp) {
8571 // Turn this into a branch on constant.
8572 auto *OldCond = BI->getCondition();
8573 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8574 : ConstantInt::getFalse(BB->getContext());
8575 BI->setCondition(TorF);
8577 return requestResimplify();
8578 }
8579
8580 // If this basic block is ONLY a compare and a branch, and if a predecessor
8581 // branches to us and one of our successors, fold the comparison into the
8582 // predecessor and use logical operations to pick the right destination.
8583 if (Options.SpeculateBlocks &&
8584 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8585 Options.BonusInstThreshold))
8586 return requestResimplify();
8587
8588 // We have a conditional branch to two blocks that are only reachable
8589 // from BI. We know that the condbr dominates the two blocks, so see if
8590 // there is any identical code in the "then" and "else" blocks. If so, we
8591 // can hoist it up to the branching block.
8592 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8593 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8594 if (HoistCommon &&
8595 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8596 return requestResimplify();
8597
8598 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8599 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8600 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8601 auto CanSpeculateConditionalLoadsStores = [&]() {
8602 for (auto *Succ : successors(BB)) {
8603 for (Instruction &I : *Succ) {
8604 if (I.isTerminator()) {
8605 if (I.getNumSuccessors() > 1)
8606 return false;
8607 continue;
8608 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8609 SpeculatedConditionalLoadsStores.size() ==
8611 return false;
8612 }
8613 SpeculatedConditionalLoadsStores.push_back(&I);
8614 }
8615 }
8616 return !SpeculatedConditionalLoadsStores.empty();
8617 };
8618
8619 if (CanSpeculateConditionalLoadsStores()) {
8620 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8621 std::nullopt, nullptr);
8622 return requestResimplify();
8623 }
8624 }
8625 } else {
8626 // If Successor #1 has multiple preds, we may be able to conditionally
8627 // execute Successor #0 if it branches to Successor #1.
8628 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8629 if (Succ0TI->getNumSuccessors() == 1 &&
8630 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8631 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8632 return requestResimplify();
8633 }
8634 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8635 // If Successor #0 has multiple preds, we may be able to conditionally
8636 // execute Successor #1 if it branches to Successor #0.
8637 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8638 if (Succ1TI->getNumSuccessors() == 1 &&
8639 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8640 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8641 return requestResimplify();
8642 }
8643
8644 // If this is a branch on something for which we know the constant value in
8645 // predecessors (e.g. a phi node in the current block), thread control
8646 // through this block.
8647 if (foldCondBranchOnValueKnownInPredecessor(BI))
8648 return requestResimplify();
8649
8650 // Scan predecessor blocks for conditional branches.
8651 for (BasicBlock *Pred : predecessors(BB))
8652 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8653 if (PBI != BI && PBI->isConditional())
8654 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8655 return requestResimplify();
8656
8657 // Look for diamond patterns.
8658 if (MergeCondStores)
8659 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8660 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8661 if (PBI != BI && PBI->isConditional())
8662 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8663 return requestResimplify();
8664
8665 // Look for nested conditional branches.
8666 if (mergeNestedCondBranch(BI, DTU))
8667 return requestResimplify();
8668
8669 return false;
8670}
8671
8672/// Check if passing a value to an instruction will cause undefined behavior.
8673static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8674 assert(V->getType() == I->getType() && "Mismatched types");
8676 if (!C)
8677 return false;
8678
8679 if (I->use_empty())
8680 return false;
8681
8682 if (C->isNullValue() || isa<UndefValue>(C)) {
8683 // Only look at the first use we can handle, avoid hurting compile time with
8684 // long uselists
8685 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8686 auto *Use = cast<Instruction>(U.getUser());
8687 // Change this list when we want to add new instructions.
8688 switch (Use->getOpcode()) {
8689 default:
8690 return false;
8691 case Instruction::GetElementPtr:
8692 case Instruction::Ret:
8693 case Instruction::BitCast:
8694 case Instruction::Load:
8695 case Instruction::Store:
8696 case Instruction::Call:
8697 case Instruction::CallBr:
8698 case Instruction::Invoke:
8699 case Instruction::UDiv:
8700 case Instruction::URem:
8701 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8702 // implemented to avoid code complexity as it is unclear how useful such
8703 // logic is.
8704 case Instruction::SDiv:
8705 case Instruction::SRem:
8706 return true;
8707 }
8708 });
8709 if (FindUse == I->use_end())
8710 return false;
8711 auto &Use = *FindUse;
8712 auto *User = cast<Instruction>(Use.getUser());
8713 // Bail out if User is not in the same BB as I or User == I or User comes
8714 // before I in the block. The latter two can be the case if User is a
8715 // PHI node.
8716 if (User->getParent() != I->getParent() || User == I ||
8717 User->comesBefore(I))
8718 return false;
8719
8720 // Now make sure that there are no instructions in between that can alter
8721 // control flow (eg. calls)
8722 auto InstrRange =
8723 make_range(std::next(I->getIterator()), User->getIterator());
8724 if (any_of(InstrRange, [](Instruction &I) {
8726 }))
8727 return false;
8728
8729 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8731 if (GEP->getPointerOperand() == I) {
8732 // The type of GEP may differ from the type of base pointer.
8733 // Bail out on vector GEPs, as they are not handled by other checks.
8734 if (GEP->getType()->isVectorTy())
8735 return false;
8736 // The current base address is null, there are four cases to consider:
8737 // getelementptr (TY, null, 0) -> null
8738 // getelementptr (TY, null, not zero) -> may be modified
8739 // getelementptr inbounds (TY, null, 0) -> null
8740 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8741 // undefined?
8742 if (!GEP->hasAllZeroIndices() &&
8743 (!GEP->isInBounds() ||
8744 NullPointerIsDefined(GEP->getFunction(),
8745 GEP->getPointerAddressSpace())))
8746 PtrValueMayBeModified = true;
8747 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8748 }
8749
8750 // Look through return.
8751 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8752 bool HasNoUndefAttr =
8753 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8754 // Return undefined to a noundef return value is undefined.
8755 if (isa<UndefValue>(C) && HasNoUndefAttr)
8756 return true;
8757 // Return null to a nonnull+noundef return value is undefined.
8758 if (C->isNullValue() && HasNoUndefAttr &&
8759 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8760 return !PtrValueMayBeModified;
8761 }
8762 }
8763
8764 // Load from null is undefined.
8765 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8766 if (!LI->isVolatile())
8767 return !NullPointerIsDefined(LI->getFunction(),
8768 LI->getPointerAddressSpace());
8769
8770 // Store to null is undefined.
8772 if (!SI->isVolatile())
8773 return (!NullPointerIsDefined(SI->getFunction(),
8774 SI->getPointerAddressSpace())) &&
8775 SI->getPointerOperand() == I;
8776
8777 // llvm.assume(false/undef) always triggers immediate UB.
8778 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8779 // Ignore assume operand bundles.
8780 if (I == Assume->getArgOperand(0))
8781 return true;
8782 }
8783
8784 if (auto *CB = dyn_cast<CallBase>(User)) {
8785 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8786 return false;
8787 // A call to null is undefined.
8788 if (CB->getCalledOperand() == I)
8789 return true;
8790
8791 if (CB->isArgOperand(&Use)) {
8792 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8793 // Passing null to a nonnnull+noundef argument is undefined.
8795 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8796 return !PtrValueMayBeModified;
8797 // Passing undef to a noundef argument is undefined.
8798 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8799 return true;
8800 }
8801 }
8802 // Div/Rem by zero is immediate UB
8803 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8804 return true;
8805 }
8806 return false;
8807}
8808
8809/// If BB has an incoming value that will always trigger undefined behavior
8810/// (eg. null pointer dereference), remove the branch leading here.
8812 DomTreeUpdater *DTU,
8813 AssumptionCache *AC) {
8814 for (PHINode &PHI : BB->phis())
8815 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8816 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8817 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8818 Instruction *T = Predecessor->getTerminator();
8819 IRBuilder<> Builder(T);
8820 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8821 BB->removePredecessor(Predecessor);
8822 // Turn unconditional branches into unreachables and remove the dead
8823 // destination from conditional branches.
8824 if (BI->isUnconditional())
8825 Builder.CreateUnreachable();
8826 else {
8827 // Preserve guarding condition in assume, because it might not be
8828 // inferrable from any dominating condition.
8829 Value *Cond = BI->getCondition();
8830 CallInst *Assumption;
8831 if (BI->getSuccessor(0) == BB)
8832 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8833 else
8834 Assumption = Builder.CreateAssumption(Cond);
8835 if (AC)
8836 AC->registerAssumption(cast<AssumeInst>(Assumption));
8837 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8838 : BI->getSuccessor(0));
8839 }
8840 BI->eraseFromParent();
8841 if (DTU)
8842 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8843 return true;
8844 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8845 // Redirect all branches leading to UB into
8846 // a newly created unreachable block.
8847 BasicBlock *Unreachable = BasicBlock::Create(
8848 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8849 Builder.SetInsertPoint(Unreachable);
8850 // The new block contains only one instruction: Unreachable
8851 Builder.CreateUnreachable();
8852 for (const auto &Case : SI->cases())
8853 if (Case.getCaseSuccessor() == BB) {
8854 BB->removePredecessor(Predecessor);
8855 Case.setSuccessor(Unreachable);
8856 }
8857 if (SI->getDefaultDest() == BB) {
8858 BB->removePredecessor(Predecessor);
8859 SI->setDefaultDest(Unreachable);
8860 }
8861
8862 if (DTU)
8863 DTU->applyUpdates(
8864 { { DominatorTree::Insert, Predecessor, Unreachable },
8865 { DominatorTree::Delete, Predecessor, BB } });
8866 return true;
8867 }
8868 }
8869
8870 return false;
8871}
8872
8873bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8874 bool Changed = false;
8875
8876 assert(BB && BB->getParent() && "Block not embedded in function!");
8877 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8878
8879 // Remove basic blocks that have no predecessors (except the entry block)...
8880 // or that just have themself as a predecessor. These are unreachable.
8881 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8882 BB->getSinglePredecessor() == BB) {
8883 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8884 DeleteDeadBlock(BB, DTU);
8885 return true;
8886 }
8887
8888 // Check to see if we can constant propagate this terminator instruction
8889 // away...
8890 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8891 /*TLI=*/nullptr, DTU);
8892
8893 // Check for and eliminate duplicate PHI nodes in this block.
8895
8896 // Check for and remove branches that will always cause undefined behavior.
8898 return requestResimplify();
8899
8900 // Merge basic blocks into their predecessor if there is only one distinct
8901 // pred, and if there is only one distinct successor of the predecessor, and
8902 // if there are no PHI nodes.
8903 if (MergeBlockIntoPredecessor(BB, DTU))
8904 return true;
8905
8906 if (SinkCommon && Options.SinkCommonInsts)
8907 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8908 mergeCompatibleInvokes(BB, DTU)) {
8909 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8910 // so we may now how duplicate PHI's.
8911 // Let's rerun EliminateDuplicatePHINodes() first,
8912 // before foldTwoEntryPHINode() potentially converts them into select's,
8913 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8914 return true;
8915 }
8916
8917 IRBuilder<> Builder(BB);
8918
8919 if (Options.SpeculateBlocks &&
8920 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8921 // If there is a trivial two-entry PHI node in this basic block, and we can
8922 // eliminate it, do so now.
8923 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8924 if (PN->getNumIncomingValues() == 2)
8925 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8926 Options.SpeculateUnpredictables))
8927 return true;
8928 }
8929
8931 Builder.SetInsertPoint(Terminator);
8932 switch (Terminator->getOpcode()) {
8933 case Instruction::Br:
8934 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8935 break;
8936 case Instruction::Resume:
8937 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8938 break;
8939 case Instruction::CleanupRet:
8940 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8941 break;
8942 case Instruction::Switch:
8943 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8944 break;
8945 case Instruction::Unreachable:
8946 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8947 break;
8948 case Instruction::IndirectBr:
8949 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8950 break;
8951 }
8952
8953 return Changed;
8954}
8955
8956bool SimplifyCFGOpt::run(BasicBlock *BB) {
8957 bool Changed = false;
8958
8959 // Repeated simplify BB as long as resimplification is requested.
8960 do {
8961 Resimplify = false;
8962
8963 // Perform one round of simplifcation. Resimplify flag will be set if
8964 // another iteration is requested.
8965 Changed |= simplifyOnce(BB);
8966 } while (Resimplify);
8967
8968 return Changed;
8969}
8970
8973 ArrayRef<WeakVH> LoopHeaders) {
8974 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8975 Options)
8976 .run(BB);
8977}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1167
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1575
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
back - Get the last element.
Definition ArrayRef.h:151
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:939
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1130
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
LLVM_ABI CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:292
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1700
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2088
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1789
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2140
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3875
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1715
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1594
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2100
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:320
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276