LLVM 20.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
69#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <climits>
80#include <cstddef>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "simplifycfg"
94
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
121 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122 cl::init(20),
123 cl::desc("Allow reordering across at most this many "
124 "instructions when hoisting"));
125
126static cl::opt<bool>
127 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
128 cl::desc("Sink common instructions down to the end block"));
129
131 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
132 cl::desc("Hoist conditional stores if an unconditional store precedes"));
133
135 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
136 cl::desc("Hoist conditional stores even if an unconditional store does not "
137 "precede - hoist multiple conditional stores into a single "
138 "predicated store"));
139
141 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
142 cl::desc("When merging conditional stores, do so even if the resultant "
143 "basic blocks are unlikely to be if-converted as a result"));
144
146 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
147 cl::desc("Allow exactly one expensive instruction to be speculatively "
148 "executed"));
149
151 "max-speculation-depth", cl::Hidden, cl::init(10),
152 cl::desc("Limit maximum recursion depth when calculating costs of "
153 "speculatively executed instructions"));
154
155static cl::opt<int>
156 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157 cl::init(10),
158 cl::desc("Max size of a block which is still considered "
159 "small enough to thread through"));
160
161// Two is chosen to allow one negation and a logical combine.
163 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164 cl::init(2),
165 cl::desc("Maximum cost of combining conditions when "
166 "folding branches"));
167
169 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170 cl::init(2),
171 cl::desc("Multiplier to apply to threshold when determining whether or not "
172 "to fold branch to common destination when vector operations are "
173 "present"));
174
176 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
177 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
178
180 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
181 cl::desc("Limit cases to analyze when converting a switch to select"));
182
183STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184STATISTIC(NumLinearMaps,
185 "Number of switch instructions turned into linear mapping");
186STATISTIC(NumLookupTables,
187 "Number of switch instructions turned into lookup tables");
189 NumLookupTablesHoles,
190 "Number of switch instructions turned into lookup tables (holes checked)");
191STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192STATISTIC(NumFoldValueComparisonIntoPredecessors,
193 "Number of value comparisons folded into predecessor basic blocks");
194STATISTIC(NumFoldBranchToCommonDest,
195 "Number of branches folded into predecessor basic block");
197 NumHoistCommonCode,
198 "Number of common instruction 'blocks' hoisted up to the begin block");
199STATISTIC(NumHoistCommonInstrs,
200 "Number of common instructions hoisted up to the begin block");
201STATISTIC(NumSinkCommonCode,
202 "Number of common instruction 'blocks' sunk down to the end block");
203STATISTIC(NumSinkCommonInstrs,
204 "Number of common instructions sunk down to the end block");
205STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206STATISTIC(NumInvokes,
207 "Number of invokes with empty resume blocks simplified into calls");
208STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211namespace {
212
213// The first field contains the value that the switch produces when a certain
214// case group is selected, and the second field is a vector containing the
215// cases composing the case group.
216using SwitchCaseResultVectorTy =
218
219// The first field contains the phi node that generates a result of the switch
220// and the second field contains the value generated for a certain case in the
221// switch for that PHI.
222using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
223
224/// ValueEqualityComparisonCase - Represents a case of a switch.
225struct ValueEqualityComparisonCase {
227 BasicBlock *Dest;
228
229 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
230 : Value(Value), Dest(Dest) {}
231
232 bool operator<(ValueEqualityComparisonCase RHS) const {
233 // Comparing pointers is ok as we only rely on the order for uniquing.
234 return Value < RHS.Value;
235 }
236
237 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
238};
239
240class SimplifyCFGOpt {
242 DomTreeUpdater *DTU;
243 const DataLayout &DL;
244 ArrayRef<WeakVH> LoopHeaders;
245 const SimplifyCFGOptions &Options;
246 bool Resimplify;
247
248 Value *isValueEqualityComparison(Instruction *TI);
249 BasicBlock *getValueEqualityComparisonCases(
250 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252 BasicBlock *Pred,
253 IRBuilder<> &Builder);
254 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
255 Instruction *PTI,
256 IRBuilder<> &Builder);
257 bool foldValueComparisonIntoPredecessors(Instruction *TI,
258 IRBuilder<> &Builder);
259
260 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261 bool simplifySingleResume(ResumeInst *RI);
262 bool simplifyCommonResume(ResumeInst *RI);
263 bool simplifyCleanupReturn(CleanupReturnInst *RI);
264 bool simplifyUnreachable(UnreachableInst *UI);
265 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266 bool simplifyIndirectBr(IndirectBrInst *IBI);
267 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272 IRBuilder<> &Builder);
273
274 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
275 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276 Instruction *TI, Instruction *I1,
277 SmallVectorImpl<Instruction *> &OtherSuccTIs);
278 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
279 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
280 BasicBlock *TrueBB, BasicBlock *FalseBB,
281 uint32_t TrueWeight, uint32_t FalseWeight);
282 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283 const DataLayout &DL);
284 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
285 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
286 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288public:
289 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291 const SimplifyCFGOptions &Opts)
292 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
293 assert((!DTU || !DTU->hasPostDomTree()) &&
294 "SimplifyCFG is not yet capable of maintaining validity of a "
295 "PostDomTree, so don't ask for it.");
296 }
297
298 bool simplifyOnce(BasicBlock *BB);
299 bool run(BasicBlock *BB);
300
301 // Helper to set Resimplify and return change indication.
302 bool requestResimplify() {
303 Resimplify = true;
304 return true;
305 }
306};
307
308} // end anonymous namespace
309
310/// Return true if all the PHI nodes in the basic block \p BB
311/// receive compatible (identical) incoming values when coming from
312/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313///
314/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315/// is provided, and *both* of the values are present in the set,
316/// then they are considered equal.
318 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
319 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
320 assert(IncomingBlocks.size() == 2 &&
321 "Only for a pair of incoming blocks at the time!");
322
323 // FIXME: it is okay if one of the incoming values is an `undef` value,
324 // iff the other incoming value is guaranteed to be a non-poison value.
325 // FIXME: it is okay if one of the incoming values is a `poison` value.
326 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
328 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
329 if (IV0 == IV1)
330 return true;
331 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
332 EquivalenceSet->contains(IV1))
333 return true;
334 return false;
335 });
336}
337
338/// Return true if it is safe to merge these two
339/// terminator instructions together.
340static bool
342 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
343 if (SI1 == SI2)
344 return false; // Can't merge with self!
345
346 // It is not safe to merge these two switch instructions if they have a common
347 // successor, and if that successor has a PHI node, and if *that* PHI node has
348 // conflicting incoming values from the two switch blocks.
349 BasicBlock *SI1BB = SI1->getParent();
350 BasicBlock *SI2BB = SI2->getParent();
351
352 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
353 bool Fail = false;
354 for (BasicBlock *Succ : successors(SI2BB)) {
355 if (!SI1Succs.count(Succ))
356 continue;
357 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
358 continue;
359 Fail = true;
360 if (FailBlocks)
361 FailBlocks->insert(Succ);
362 else
363 break;
364 }
365
366 return !Fail;
367}
368
369/// Update PHI nodes in Succ to indicate that there will now be entries in it
370/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371/// will be the same as those coming in from ExistPred, an existing predecessor
372/// of Succ.
373static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
374 BasicBlock *ExistPred,
375 MemorySSAUpdater *MSSAU = nullptr) {
376 for (PHINode &PN : Succ->phis())
377 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
378 if (MSSAU)
379 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
380 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
381}
382
383/// Compute an abstract "cost" of speculating the given instruction,
384/// which is assumed to be safe to speculate. TCC_Free means cheap,
385/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386/// expensive.
388 const TargetTransformInfo &TTI) {
389 assert((!isa<Instruction>(I) ||
390 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391 "Instruction is not safe to speculatively execute!");
393}
394
395/// If we have a merge point of an "if condition" as accepted above,
396/// return true if the specified value dominates the block. We
397/// don't handle the true generality of domination here, just a special case
398/// which works well enough for us.
399///
400/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401/// see if V (which must be an instruction) and its recursive operands
402/// that do not dominate BB have a combined cost lower than Budget and
403/// are non-trapping. If both are true, the instruction is inserted into the
404/// set and true is returned.
405///
406/// The cost for most non-trapping instructions is defined as 1 except for
407/// Select whose cost is 2.
408///
409/// After this function returns, Cost is increased by the cost of
410/// V plus its non-dominating operands. If that cost is greater than
411/// Budget, false is returned and Cost is undefined.
413 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
415 InstructionCost Budget,
417 unsigned Depth = 0) {
418 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419 // so limit the recursion depth.
420 // TODO: While this recursion limit does prevent pathological behavior, it
421 // would be better to track visited instructions to avoid cycles.
423 return false;
424
425 Instruction *I = dyn_cast<Instruction>(V);
426 if (!I) {
427 // Non-instructions dominate all instructions and can be executed
428 // unconditionally.
429 return true;
430 }
431 BasicBlock *PBB = I->getParent();
432
433 // We don't want to allow weird loops that might have the "if condition" in
434 // the bottom of this block.
435 if (PBB == BB)
436 return false;
437
438 // If this instruction is defined in a block that contains an unconditional
439 // branch to BB, then it must be in the 'conditional' part of the "if
440 // statement". If not, it definitely dominates the region.
441 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
442 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
443 return true;
444
445 // If we have seen this instruction before, don't count it again.
446 if (AggressiveInsts.count(I))
447 return true;
448
449 // Okay, it looks like the instruction IS in the "condition". Check to
450 // see if it's a cheap instruction to unconditionally compute, and if it
451 // only uses stuff defined outside of the condition. If so, hoist it out.
453 return false;
454
456
457 // Allow exactly one instruction to be speculated regardless of its cost
458 // (as long as it is safe to do so).
459 // This is intended to flatten the CFG even if the instruction is a division
460 // or other expensive operation. The speculation of an expensive instruction
461 // is expected to be undone in CodeGenPrepare if the speculation has not
462 // enabled further IR optimizations.
463 if (Cost > Budget &&
464 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
465 !Cost.isValid()))
466 return false;
467
468 // Okay, we can only really hoist these out if their operands do
469 // not take us over the cost threshold.
470 for (Use &Op : I->operands())
471 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
472 Depth + 1))
473 return false;
474 // Okay, it's safe to do this! Remember this instruction.
475 AggressiveInsts.insert(I);
476 return true;
477}
478
479/// Extract ConstantInt from value, looking through IntToPtr
480/// and PointerNullValue. Return NULL if value is not a constant int.
482 // Normal constant int.
483 ConstantInt *CI = dyn_cast<ConstantInt>(V);
484 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
485 DL.isNonIntegralPointerType(V->getType()))
486 return CI;
487
488 // This is some kind of pointer constant. Turn it into a pointer-sized
489 // ConstantInt if possible.
490 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
491
492 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
493 if (isa<ConstantPointerNull>(V))
494 return ConstantInt::get(PtrTy, 0);
495
496 // IntToPtr const int.
497 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
498 if (CE->getOpcode() == Instruction::IntToPtr)
499 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
500 // The constant is very likely to have the right type already.
501 if (CI->getType() == PtrTy)
502 return CI;
503 else
504 return cast<ConstantInt>(
505 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
506 }
507 return nullptr;
508}
509
510namespace {
511
512/// Given a chain of or (||) or and (&&) comparison of a value against a
513/// constant, this will try to recover the information required for a switch
514/// structure.
515/// It will depth-first traverse the chain of comparison, seeking for patterns
516/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517/// representing the different cases for the switch.
518/// Note that if the chain is composed of '||' it will build the set of elements
519/// that matches the comparisons (i.e. any of this value validate the chain)
520/// while for a chain of '&&' it will build the set elements that make the test
521/// fail.
522struct ConstantComparesGatherer {
523 const DataLayout &DL;
524
525 /// Value found for the switch comparison
526 Value *CompValue = nullptr;
527
528 /// Extra clause to be checked before the switch
529 Value *Extra = nullptr;
530
531 /// Set of integers to match in switch
533
534 /// Number of comparisons matched in the and/or chain
535 unsigned UsedICmps = 0;
536
537 /// Construct and compute the result for the comparison instruction Cond
538 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
539 gather(Cond);
540 }
541
542 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543 ConstantComparesGatherer &
544 operator=(const ConstantComparesGatherer &) = delete;
545
546private:
547 /// Try to set the current value used for the comparison, it succeeds only if
548 /// it wasn't set before or if the new value is the same as the old one
549 bool setValueOnce(Value *NewVal) {
550 if (CompValue && CompValue != NewVal)
551 return false;
552 CompValue = NewVal;
553 return (CompValue != nullptr);
554 }
555
556 /// Try to match Instruction "I" as a comparison against a constant and
557 /// populates the array Vals with the set of values that match (or do not
558 /// match depending on isEQ).
559 /// Return false on failure. On success, the Value the comparison matched
560 /// against is placed in CompValue.
561 /// If CompValue is already set, the function is expected to fail if a match
562 /// is found but the value compared to is different.
563 bool matchInstruction(Instruction *I, bool isEQ) {
564 // If this is an icmp against a constant, handle this as one of the cases.
565 ICmpInst *ICI;
566 ConstantInt *C;
567 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
568 (C = getConstantInt(I->getOperand(1), DL)))) {
569 return false;
570 }
571
572 Value *RHSVal;
573 const APInt *RHSC;
574
575 // Pattern match a special case
576 // (x & ~2^z) == y --> x == y || x == y|2^z
577 // This undoes a transformation done by instcombine to fuse 2 compares.
578 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579 // It's a little bit hard to see why the following transformations are
580 // correct. Here is a CVC3 program to verify them for 64-bit values:
581
582 /*
583 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584 x : BITVECTOR(64);
585 y : BITVECTOR(64);
586 z : BITVECTOR(64);
587 mask : BITVECTOR(64) = BVSHL(ONE, z);
588 QUERY( (y & ~mask = y) =>
589 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
590 );
591 QUERY( (y | mask = y) =>
592 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 );
594 */
595
596 // Please note that each pattern must be a dual implication (<--> or
597 // iff). One directional implication can create spurious matches. If the
598 // implication is only one-way, an unsatisfiable condition on the left
599 // side can imply a satisfiable condition on the right side. Dual
600 // implication ensures that satisfiable conditions are transformed to
601 // other satisfiable conditions and unsatisfiable conditions are
602 // transformed to other unsatisfiable conditions.
603
604 // Here is a concrete example of a unsatisfiable condition on the left
605 // implying a satisfiable condition on the right:
606 //
607 // mask = (1 << z)
608 // (x & ~mask) == y --> (x == y || x == (y | mask))
609 //
610 // Substituting y = 3, z = 0 yields:
611 // (x & -2) == 3 --> (x == 3 || x == 2)
612
613 // Pattern match a special case:
614 /*
615 QUERY( (y & ~mask = y) =>
616 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
617 );
618 */
619 if (match(ICI->getOperand(0),
620 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
621 APInt Mask = ~*RHSC;
622 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623 // If we already have a value for the switch, it has to match!
624 if (!setValueOnce(RHSVal))
625 return false;
626
627 Vals.push_back(C);
628 Vals.push_back(
629 ConstantInt::get(C->getContext(),
630 C->getValue() | Mask));
631 UsedICmps++;
632 return true;
633 }
634 }
635
636 // Pattern match a special case:
637 /*
638 QUERY( (y | mask = y) =>
639 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
640 );
641 */
642 if (match(ICI->getOperand(0),
643 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
644 APInt Mask = *RHSC;
645 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(RHSVal))
648 return false;
649
650 Vals.push_back(C);
651 Vals.push_back(ConstantInt::get(C->getContext(),
652 C->getValue() & ~Mask));
653 UsedICmps++;
654 return true;
655 }
656 }
657
658 // If we already have a value for the switch, it has to match!
659 if (!setValueOnce(ICI->getOperand(0)))
660 return false;
661
662 UsedICmps++;
663 Vals.push_back(C);
664 return ICI->getOperand(0);
665 }
666
667 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668 ConstantRange Span =
670
671 // Shift the range if the compare is fed by an add. This is the range
672 // compare idiom as emitted by instcombine.
673 Value *CandidateVal = I->getOperand(0);
674 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
675 Span = Span.subtract(*RHSC);
676 CandidateVal = RHSVal;
677 }
678
679 // If this is an and/!= check, then we are looking to build the set of
680 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
681 // x != 0 && x != 1.
682 if (!isEQ)
683 Span = Span.inverse();
684
685 // If there are a ton of values, we don't want to make a ginormous switch.
686 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
687 return false;
688 }
689
690 // If we already have a value for the switch, it has to match!
691 if (!setValueOnce(CandidateVal))
692 return false;
693
694 // Add all values from the range to the set
695 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
697
698 UsedICmps++;
699 return true;
700 }
701
702 /// Given a potentially 'or'd or 'and'd together collection of icmp
703 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
704 /// the value being compared, and stick the list constants into the Vals
705 /// vector.
706 /// One "Extra" case is allowed to differ from the other.
707 void gather(Value *V) {
708 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
709
710 // Keep a stack (SmallVector for efficiency) for depth-first traversal
713
714 // Initialize
715 Visited.insert(V);
716 DFT.push_back(V);
717
718 while (!DFT.empty()) {
719 V = DFT.pop_back_val();
720
721 if (Instruction *I = dyn_cast<Instruction>(V)) {
722 // If it is a || (or && depending on isEQ), process the operands.
723 Value *Op0, *Op1;
724 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
725 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
726 if (Visited.insert(Op1).second)
727 DFT.push_back(Op1);
728 if (Visited.insert(Op0).second)
729 DFT.push_back(Op0);
730
731 continue;
732 }
733
734 // Try to match the current instruction
735 if (matchInstruction(I, isEQ))
736 // Match succeed, continue the loop
737 continue;
738 }
739
740 // One element of the sequence of || (or &&) could not be match as a
741 // comparison against the same value as the others.
742 // We allow only one "Extra" case to be checked before the switch
743 if (!Extra) {
744 Extra = V;
745 continue;
746 }
747 // Failed to parse a proper sequence, abort now
748 CompValue = nullptr;
749 break;
750 }
751 }
752};
753
754} // end anonymous namespace
755
757 MemorySSAUpdater *MSSAU = nullptr) {
758 Instruction *Cond = nullptr;
759 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
760 Cond = dyn_cast<Instruction>(SI->getCondition());
761 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
762 if (BI->isConditional())
763 Cond = dyn_cast<Instruction>(BI->getCondition());
764 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
765 Cond = dyn_cast<Instruction>(IBI->getAddress());
766 }
767
768 TI->eraseFromParent();
769 if (Cond)
771}
772
773/// Return true if the specified terminator checks
774/// to see if a value is equal to constant integer value.
775Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
776 Value *CV = nullptr;
777 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
778 // Do not permit merging of large switch instructions into their
779 // predecessors unless there is only one predecessor.
780 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
781 CV = SI->getCondition();
782 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
783 if (BI->isConditional() && BI->getCondition()->hasOneUse())
784 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
785 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
786 CV = ICI->getOperand(0);
787 }
788
789 // Unwrap any lossless ptrtoint cast.
790 if (CV) {
791 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
792 Value *Ptr = PTII->getPointerOperand();
793 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794 CV = Ptr;
795 }
796 }
797 return CV;
798}
799
800/// Given a value comparison instruction,
801/// decode all of the 'cases' that it represents and return the 'default' block.
802BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
803 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
805 Cases.reserve(SI->getNumCases());
806 for (auto Case : SI->cases())
807 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
808 Case.getCaseSuccessor()));
809 return SI->getDefaultDest();
810 }
811
812 BranchInst *BI = cast<BranchInst>(TI);
813 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
814 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
815 Cases.push_back(ValueEqualityComparisonCase(
816 getConstantInt(ICI->getOperand(1), DL), Succ));
817 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
818}
819
820/// Given a vector of bb/value pairs, remove any entries
821/// in the list that match the specified block.
822static void
824 std::vector<ValueEqualityComparisonCase> &Cases) {
825 llvm::erase(Cases, BB);
826}
827
828/// Return true if there are any keys in C1 that exist in C2 as well.
829static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830 std::vector<ValueEqualityComparisonCase> &C2) {
831 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
832
833 // Make V1 be smaller than V2.
834 if (V1->size() > V2->size())
835 std::swap(V1, V2);
836
837 if (V1->empty())
838 return false;
839 if (V1->size() == 1) {
840 // Just scan V2.
841 ConstantInt *TheVal = (*V1)[0].Value;
842 for (const ValueEqualityComparisonCase &VECC : *V2)
843 if (TheVal == VECC.Value)
844 return true;
845 }
846
847 // Otherwise, just sort both lists and compare element by element.
848 array_pod_sort(V1->begin(), V1->end());
849 array_pod_sort(V2->begin(), V2->end());
850 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
851 while (i1 != e1 && i2 != e2) {
852 if ((*V1)[i1].Value == (*V2)[i2].Value)
853 return true;
854 if ((*V1)[i1].Value < (*V2)[i2].Value)
855 ++i1;
856 else
857 ++i2;
858 }
859 return false;
860}
861
862// Set branch weights on SwitchInst. This sets the metadata if there is at
863// least one non-zero weight.
865 bool IsExpected) {
866 // Check that there is at least one non-zero weight. Otherwise, pass
867 // nullptr to setMetadata which will erase the existing metadata.
868 MDNode *N = nullptr;
869 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
870 N = MDBuilder(SI->getParent()->getContext())
871 .createBranchWeights(Weights, IsExpected);
872 SI->setMetadata(LLVMContext::MD_prof, N);
873}
874
875// Similar to the above, but for branch and select instructions that take
876// exactly 2 weights.
877static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
878 uint32_t FalseWeight, bool IsExpected) {
879 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
880 // Check that there is at least one non-zero weight. Otherwise, pass
881 // nullptr to setMetadata which will erase the existing metadata.
882 MDNode *N = nullptr;
883 if (TrueWeight || FalseWeight)
884 N = MDBuilder(I->getParent()->getContext())
885 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
886 I->setMetadata(LLVMContext::MD_prof, N);
887}
888
889/// If TI is known to be a terminator instruction and its block is known to
890/// only have a single predecessor block, check to see if that predecessor is
891/// also a value comparison with the same value, and if that comparison
892/// determines the outcome of this comparison. If so, simplify TI. This does a
893/// very limited form of jump threading.
894bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
895 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
896 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
897 if (!PredVal)
898 return false; // Not a value comparison in predecessor.
899
900 Value *ThisVal = isValueEqualityComparison(TI);
901 assert(ThisVal && "This isn't a value comparison!!");
902 if (ThisVal != PredVal)
903 return false; // Different predicates.
904
905 // TODO: Preserve branch weight metadata, similarly to how
906 // foldValueComparisonIntoPredecessors preserves it.
907
908 // Find out information about when control will move from Pred to TI's block.
909 std::vector<ValueEqualityComparisonCase> PredCases;
910 BasicBlock *PredDef =
911 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
912 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
913
914 // Find information about how control leaves this block.
915 std::vector<ValueEqualityComparisonCase> ThisCases;
916 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
917 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
918
919 // If TI's block is the default block from Pred's comparison, potentially
920 // simplify TI based on this knowledge.
921 if (PredDef == TI->getParent()) {
922 // If we are here, we know that the value is none of those cases listed in
923 // PredCases. If there are any cases in ThisCases that are in PredCases, we
924 // can simplify TI.
925 if (!valuesOverlap(PredCases, ThisCases))
926 return false;
927
928 if (isa<BranchInst>(TI)) {
929 // Okay, one of the successors of this condbr is dead. Convert it to a
930 // uncond br.
931 assert(ThisCases.size() == 1 && "Branch can only have one case!");
932 // Insert the new branch.
933 Instruction *NI = Builder.CreateBr(ThisDef);
934 (void)NI;
935
936 // Remove PHI node entries for the dead edge.
937 ThisCases[0].Dest->removePredecessor(PredDef);
938
939 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
940 << "Through successor TI: " << *TI << "Leaving: " << *NI
941 << "\n");
942
944
945 if (DTU)
946 DTU->applyUpdates(
947 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
948
949 return true;
950 }
951
952 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
953 // Okay, TI has cases that are statically dead, prune them away.
955 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
956 DeadCases.insert(PredCases[i].Value);
957
958 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
959 << "Through successor TI: " << *TI);
960
961 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
962 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
963 --i;
964 auto *Successor = i->getCaseSuccessor();
965 if (DTU)
966 ++NumPerSuccessorCases[Successor];
967 if (DeadCases.count(i->getCaseValue())) {
968 Successor->removePredecessor(PredDef);
969 SI.removeCase(i);
970 if (DTU)
971 --NumPerSuccessorCases[Successor];
972 }
973 }
974
975 if (DTU) {
976 std::vector<DominatorTree::UpdateType> Updates;
977 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
978 if (I.second == 0)
979 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
980 DTU->applyUpdates(Updates);
981 }
982
983 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
984 return true;
985 }
986
987 // Otherwise, TI's block must correspond to some matched value. Find out
988 // which value (or set of values) this is.
989 ConstantInt *TIV = nullptr;
990 BasicBlock *TIBB = TI->getParent();
991 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
992 if (PredCases[i].Dest == TIBB) {
993 if (TIV)
994 return false; // Cannot handle multiple values coming to this block.
995 TIV = PredCases[i].Value;
996 }
997 assert(TIV && "No edge from pred to succ?");
998
999 // Okay, we found the one constant that our value can be if we get into TI's
1000 // BB. Find out which successor will unconditionally be branched to.
1001 BasicBlock *TheRealDest = nullptr;
1002 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1003 if (ThisCases[i].Value == TIV) {
1004 TheRealDest = ThisCases[i].Dest;
1005 break;
1006 }
1007
1008 // If not handled by any explicit cases, it is handled by the default case.
1009 if (!TheRealDest)
1010 TheRealDest = ThisDef;
1011
1012 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1013
1014 // Remove PHI node entries for dead edges.
1015 BasicBlock *CheckEdge = TheRealDest;
1016 for (BasicBlock *Succ : successors(TIBB))
1017 if (Succ != CheckEdge) {
1018 if (Succ != TheRealDest)
1019 RemovedSuccs.insert(Succ);
1020 Succ->removePredecessor(TIBB);
1021 } else
1022 CheckEdge = nullptr;
1023
1024 // Insert the new branch.
1025 Instruction *NI = Builder.CreateBr(TheRealDest);
1026 (void)NI;
1027
1028 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1029 << "Through successor TI: " << *TI << "Leaving: " << *NI
1030 << "\n");
1031
1033 if (DTU) {
1035 Updates.reserve(RemovedSuccs.size());
1036 for (auto *RemovedSucc : RemovedSuccs)
1037 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1038 DTU->applyUpdates(Updates);
1039 }
1040 return true;
1041}
1042
1043namespace {
1044
1045/// This class implements a stable ordering of constant
1046/// integers that does not depend on their address. This is important for
1047/// applications that sort ConstantInt's to ensure uniqueness.
1048struct ConstantIntOrdering {
1049 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1050 return LHS->getValue().ult(RHS->getValue());
1051 }
1052};
1053
1054} // end anonymous namespace
1055
1057 ConstantInt *const *P2) {
1058 const ConstantInt *LHS = *P1;
1059 const ConstantInt *RHS = *P2;
1060 if (LHS == RHS)
1061 return 0;
1062 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1063}
1064
1065/// Get Weights of a given terminator, the default weight is at the front
1066/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1067/// metadata.
1069 SmallVectorImpl<uint64_t> &Weights) {
1070 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1071 assert(MD && "Invalid branch-weight metadata");
1072 extractFromBranchWeightMD64(MD, Weights);
1073
1074 // If TI is a conditional eq, the default case is the false case,
1075 // and the corresponding branch-weight data is at index 2. We swap the
1076 // default weight to be the first entry.
1077 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1078 assert(Weights.size() == 2);
1079 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1080 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1081 std::swap(Weights.front(), Weights.back());
1082 }
1083}
1084
1085/// Keep halving the weights until all can fit in uint32_t.
1087 uint64_t Max = *llvm::max_element(Weights);
1088 if (Max > UINT_MAX) {
1089 unsigned Offset = 32 - llvm::countl_zero(Max);
1090 for (uint64_t &I : Weights)
1091 I >>= Offset;
1092 }
1093}
1094
1096 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1097 Instruction *PTI = PredBlock->getTerminator();
1098
1099 // If we have bonus instructions, clone them into the predecessor block.
1100 // Note that there may be multiple predecessor blocks, so we cannot move
1101 // bonus instructions to a predecessor block.
1102 for (Instruction &BonusInst : *BB) {
1103 if (BonusInst.isTerminator())
1104 continue;
1105
1106 Instruction *NewBonusInst = BonusInst.clone();
1107
1108 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1109 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1110 // Unless the instruction has the same !dbg location as the original
1111 // branch, drop it. When we fold the bonus instructions we want to make
1112 // sure we reset their debug locations in order to avoid stepping on
1113 // dead code caused by folding dead branches.
1114 NewBonusInst->setDebugLoc(DebugLoc());
1115 }
1116
1117 RemapInstruction(NewBonusInst, VMap,
1119
1120 // If we speculated an instruction, we need to drop any metadata that may
1121 // result in undefined behavior, as the metadata might have been valid
1122 // only given the branch precondition.
1123 // Similarly strip attributes on call parameters that may cause UB in
1124 // location the call is moved to.
1125 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1126
1127 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1128 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1129 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1131
1132 if (isa<DbgInfoIntrinsic>(BonusInst))
1133 continue;
1134
1135 NewBonusInst->takeName(&BonusInst);
1136 BonusInst.setName(NewBonusInst->getName() + ".old");
1137 VMap[&BonusInst] = NewBonusInst;
1138
1139 // Update (liveout) uses of bonus instructions,
1140 // now that the bonus instruction has been cloned into predecessor.
1141 // Note that we expect to be in a block-closed SSA form for this to work!
1142 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1143 auto *UI = cast<Instruction>(U.getUser());
1144 auto *PN = dyn_cast<PHINode>(UI);
1145 if (!PN) {
1146 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1147 "If the user is not a PHI node, then it should be in the same "
1148 "block as, and come after, the original bonus instruction.");
1149 continue; // Keep using the original bonus instruction.
1150 }
1151 // Is this the block-closed SSA form PHI node?
1152 if (PN->getIncomingBlock(U) == BB)
1153 continue; // Great, keep using the original bonus instruction.
1154 // The only other alternative is an "use" when coming from
1155 // the predecessor block - here we should refer to the cloned bonus instr.
1156 assert(PN->getIncomingBlock(U) == PredBlock &&
1157 "Not in block-closed SSA form?");
1158 U.set(NewBonusInst);
1159 }
1160 }
1161}
1162
1163bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1164 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1165 BasicBlock *BB = TI->getParent();
1166 BasicBlock *Pred = PTI->getParent();
1167
1169
1170 // Figure out which 'cases' to copy from SI to PSI.
1171 std::vector<ValueEqualityComparisonCase> BBCases;
1172 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1173
1174 std::vector<ValueEqualityComparisonCase> PredCases;
1175 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1176
1177 // Based on whether the default edge from PTI goes to BB or not, fill in
1178 // PredCases and PredDefault with the new switch cases we would like to
1179 // build.
1181
1182 // Update the branch weight metadata along the way
1184 bool PredHasWeights = hasBranchWeightMD(*PTI);
1185 bool SuccHasWeights = hasBranchWeightMD(*TI);
1186
1187 if (PredHasWeights) {
1188 getBranchWeights(PTI, Weights);
1189 // branch-weight metadata is inconsistent here.
1190 if (Weights.size() != 1 + PredCases.size())
1191 PredHasWeights = SuccHasWeights = false;
1192 } else if (SuccHasWeights)
1193 // If there are no predecessor weights but there are successor weights,
1194 // populate Weights with 1, which will later be scaled to the sum of
1195 // successor's weights
1196 Weights.assign(1 + PredCases.size(), 1);
1197
1198 SmallVector<uint64_t, 8> SuccWeights;
1199 if (SuccHasWeights) {
1200 getBranchWeights(TI, SuccWeights);
1201 // branch-weight metadata is inconsistent here.
1202 if (SuccWeights.size() != 1 + BBCases.size())
1203 PredHasWeights = SuccHasWeights = false;
1204 } else if (PredHasWeights)
1205 SuccWeights.assign(1 + BBCases.size(), 1);
1206
1207 if (PredDefault == BB) {
1208 // If this is the default destination from PTI, only the edges in TI
1209 // that don't occur in PTI, or that branch to BB will be activated.
1210 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1211 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1212 if (PredCases[i].Dest != BB)
1213 PTIHandled.insert(PredCases[i].Value);
1214 else {
1215 // The default destination is BB, we don't need explicit targets.
1216 std::swap(PredCases[i], PredCases.back());
1217
1218 if (PredHasWeights || SuccHasWeights) {
1219 // Increase weight for the default case.
1220 Weights[0] += Weights[i + 1];
1221 std::swap(Weights[i + 1], Weights.back());
1222 Weights.pop_back();
1223 }
1224
1225 PredCases.pop_back();
1226 --i;
1227 --e;
1228 }
1229
1230 // Reconstruct the new switch statement we will be building.
1231 if (PredDefault != BBDefault) {
1232 PredDefault->removePredecessor(Pred);
1233 if (DTU && PredDefault != BB)
1234 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1235 PredDefault = BBDefault;
1236 ++NewSuccessors[BBDefault];
1237 }
1238
1239 unsigned CasesFromPred = Weights.size();
1240 uint64_t ValidTotalSuccWeight = 0;
1241 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1242 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1243 PredCases.push_back(BBCases[i]);
1244 ++NewSuccessors[BBCases[i].Dest];
1245 if (SuccHasWeights || PredHasWeights) {
1246 // The default weight is at index 0, so weight for the ith case
1247 // should be at index i+1. Scale the cases from successor by
1248 // PredDefaultWeight (Weights[0]).
1249 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1250 ValidTotalSuccWeight += SuccWeights[i + 1];
1251 }
1252 }
1253
1254 if (SuccHasWeights || PredHasWeights) {
1255 ValidTotalSuccWeight += SuccWeights[0];
1256 // Scale the cases from predecessor by ValidTotalSuccWeight.
1257 for (unsigned i = 1; i < CasesFromPred; ++i)
1258 Weights[i] *= ValidTotalSuccWeight;
1259 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1260 Weights[0] *= SuccWeights[0];
1261 }
1262 } else {
1263 // If this is not the default destination from PSI, only the edges
1264 // in SI that occur in PSI with a destination of BB will be
1265 // activated.
1266 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1267 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1268 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1269 if (PredCases[i].Dest == BB) {
1270 PTIHandled.insert(PredCases[i].Value);
1271
1272 if (PredHasWeights || SuccHasWeights) {
1273 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1274 std::swap(Weights[i + 1], Weights.back());
1275 Weights.pop_back();
1276 }
1277
1278 std::swap(PredCases[i], PredCases.back());
1279 PredCases.pop_back();
1280 --i;
1281 --e;
1282 }
1283
1284 // Okay, now we know which constants were sent to BB from the
1285 // predecessor. Figure out where they will all go now.
1286 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1287 if (PTIHandled.count(BBCases[i].Value)) {
1288 // If this is one we are capable of getting...
1289 if (PredHasWeights || SuccHasWeights)
1290 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1291 PredCases.push_back(BBCases[i]);
1292 ++NewSuccessors[BBCases[i].Dest];
1293 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1294 }
1295
1296 // If there are any constants vectored to BB that TI doesn't handle,
1297 // they must go to the default destination of TI.
1298 for (ConstantInt *I : PTIHandled) {
1299 if (PredHasWeights || SuccHasWeights)
1300 Weights.push_back(WeightsForHandled[I]);
1301 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1302 ++NewSuccessors[BBDefault];
1303 }
1304 }
1305
1306 // Okay, at this point, we know which new successor Pred will get. Make
1307 // sure we update the number of entries in the PHI nodes for these
1308 // successors.
1309 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1310 if (DTU) {
1311 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1312 Updates.reserve(Updates.size() + NewSuccessors.size());
1313 }
1314 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1315 NewSuccessors) {
1316 for (auto I : seq(NewSuccessor.second)) {
1317 (void)I;
1318 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1319 }
1320 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1321 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1322 }
1323
1324 Builder.SetInsertPoint(PTI);
1325 // Convert pointer to int before we switch.
1326 if (CV->getType()->isPointerTy()) {
1327 CV =
1328 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1329 }
1330
1331 // Now that the successors are updated, create the new Switch instruction.
1332 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1333 NewSI->setDebugLoc(PTI->getDebugLoc());
1334 for (ValueEqualityComparisonCase &V : PredCases)
1335 NewSI->addCase(V.Value, V.Dest);
1336
1337 if (PredHasWeights || SuccHasWeights) {
1338 // Halve the weights if any of them cannot fit in an uint32_t
1339 fitWeights(Weights);
1340
1341 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1342
1343 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1344 }
1345
1347
1348 // Okay, last check. If BB is still a successor of PSI, then we must
1349 // have an infinite loop case. If so, add an infinitely looping block
1350 // to handle the case to preserve the behavior of the code.
1351 BasicBlock *InfLoopBlock = nullptr;
1352 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1353 if (NewSI->getSuccessor(i) == BB) {
1354 if (!InfLoopBlock) {
1355 // Insert it at the end of the function, because it's either code,
1356 // or it won't matter if it's hot. :)
1357 InfLoopBlock =
1358 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1359 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1360 if (DTU)
1361 Updates.push_back(
1362 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1363 }
1364 NewSI->setSuccessor(i, InfLoopBlock);
1365 }
1366
1367 if (DTU) {
1368 if (InfLoopBlock)
1369 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1370
1371 Updates.push_back({DominatorTree::Delete, Pred, BB});
1372
1373 DTU->applyUpdates(Updates);
1374 }
1375
1376 ++NumFoldValueComparisonIntoPredecessors;
1377 return true;
1378}
1379
1380/// The specified terminator is a value equality comparison instruction
1381/// (either a switch or a branch on "X == c").
1382/// See if any of the predecessors of the terminator block are value comparisons
1383/// on the same value. If so, and if safe to do so, fold them together.
1384bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1385 IRBuilder<> &Builder) {
1386 BasicBlock *BB = TI->getParent();
1387 Value *CV = isValueEqualityComparison(TI); // CondVal
1388 assert(CV && "Not a comparison?");
1389
1390 bool Changed = false;
1391
1393 while (!Preds.empty()) {
1394 BasicBlock *Pred = Preds.pop_back_val();
1395 Instruction *PTI = Pred->getTerminator();
1396
1397 // Don't try to fold into itself.
1398 if (Pred == BB)
1399 continue;
1400
1401 // See if the predecessor is a comparison with the same value.
1402 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1403 if (PCV != CV)
1404 continue;
1405
1407 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1408 for (auto *Succ : FailBlocks) {
1409 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1410 return false;
1411 }
1412 }
1413
1414 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1415 Changed = true;
1416 }
1417 return Changed;
1418}
1419
1420// If we would need to insert a select that uses the value of this invoke
1421// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1422// need to do this), we can't hoist the invoke, as there is nowhere to put the
1423// select in this case.
1425 Instruction *I1, Instruction *I2) {
1426 for (BasicBlock *Succ : successors(BB1)) {
1427 for (const PHINode &PN : Succ->phis()) {
1428 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1429 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1430 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1431 return false;
1432 }
1433 }
1434 }
1435 return true;
1436}
1437
1438// Get interesting characteristics of instructions that
1439// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1440// instructions can be reordered across.
1446
1448 unsigned Flags = 0;
1449 if (I->mayReadFromMemory())
1450 Flags |= SkipReadMem;
1451 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1452 // inalloca) across stacksave/stackrestore boundaries.
1453 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1454 Flags |= SkipSideEffect;
1456 Flags |= SkipImplicitControlFlow;
1457 return Flags;
1458}
1459
1460// Returns true if it is safe to reorder an instruction across preceding
1461// instructions in a basic block.
1462static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1463 // Don't reorder a store over a load.
1464 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1465 return false;
1466
1467 // If we have seen an instruction with side effects, it's unsafe to reorder an
1468 // instruction which reads memory or itself has side effects.
1469 if ((Flags & SkipSideEffect) &&
1470 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1471 return false;
1472
1473 // Reordering across an instruction which does not necessarily transfer
1474 // control to the next instruction is speculation.
1476 return false;
1477
1478 // Hoisting of llvm.deoptimize is only legal together with the next return
1479 // instruction, which this pass is not always able to do.
1480 if (auto *CB = dyn_cast<CallBase>(I))
1481 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1482 return false;
1483
1484 // It's also unsafe/illegal to hoist an instruction above its instruction
1485 // operands
1486 BasicBlock *BB = I->getParent();
1487 for (Value *Op : I->operands()) {
1488 if (auto *J = dyn_cast<Instruction>(Op))
1489 if (J->getParent() == BB)
1490 return false;
1491 }
1492
1493 return true;
1494}
1495
1496static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1497
1498/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1499/// instructions \p I1 and \p I2 can and should be hoisted.
1501 const TargetTransformInfo &TTI) {
1502 // If we're going to hoist a call, make sure that the two instructions
1503 // we're commoning/hoisting are both marked with musttail, or neither of
1504 // them is marked as such. Otherwise, we might end up in a situation where
1505 // we hoist from a block where the terminator is a `ret` to a block where
1506 // the terminator is a `br`, and `musttail` calls expect to be followed by
1507 // a return.
1508 auto *C1 = dyn_cast<CallInst>(I1);
1509 auto *C2 = dyn_cast<CallInst>(I2);
1510 if (C1 && C2)
1511 if (C1->isMustTailCall() != C2->isMustTailCall())
1512 return false;
1513
1515 return false;
1516
1517 // If any of the two call sites has nomerge or convergent attribute, stop
1518 // hoisting.
1519 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1520 if (CB1->cannotMerge() || CB1->isConvergent())
1521 return false;
1522 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1523 if (CB2->cannotMerge() || CB2->isConvergent())
1524 return false;
1525
1526 return true;
1527}
1528
1529/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1530/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1531/// hoistCommonCodeFromSuccessors. e.g. The input:
1532/// I1 DVRs: { x, z },
1533/// OtherInsts: { I2 DVRs: { x, y, z } }
1534/// would result in hoisting only DbgVariableRecord x.
1536 Instruction *TI, Instruction *I1,
1537 SmallVectorImpl<Instruction *> &OtherInsts) {
1538 if (!I1->hasDbgRecords())
1539 return;
1540 using CurrentAndEndIt =
1541 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1542 // Vector of {Current, End} iterators.
1544 Itrs.reserve(OtherInsts.size() + 1);
1545 // Helper lambdas for lock-step checks:
1546 // Return true if this Current == End.
1547 auto atEnd = [](const CurrentAndEndIt &Pair) {
1548 return Pair.first == Pair.second;
1549 };
1550 // Return true if all Current are identical.
1551 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1552 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1554 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1555 });
1556 };
1557
1558 // Collect the iterators.
1559 Itrs.push_back(
1560 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1561 for (Instruction *Other : OtherInsts) {
1562 if (!Other->hasDbgRecords())
1563 return;
1564 Itrs.push_back(
1565 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1566 }
1567
1568 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1569 // the lock-step DbgRecord are identical, hoist all of them to TI.
1570 // This replicates the dbg.* intrinsic behaviour in
1571 // hoistCommonCodeFromSuccessors.
1572 while (none_of(Itrs, atEnd)) {
1573 bool HoistDVRs = allIdentical(Itrs);
1574 for (CurrentAndEndIt &Pair : Itrs) {
1575 // Increment Current iterator now as we may be about to move the
1576 // DbgRecord.
1577 DbgRecord &DR = *Pair.first++;
1578 if (HoistDVRs) {
1579 DR.removeFromParent();
1580 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1581 }
1582 }
1583 }
1584}
1585
1587 const Instruction *I2) {
1588 if (I1->isIdenticalToWhenDefined(I2))
1589 return true;
1590
1591 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1592 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1593 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1594 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1595 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1596
1597 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1598 return I1->getOperand(0) == I2->getOperand(1) &&
1599 I1->getOperand(1) == I2->getOperand(0) &&
1600 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1601 }
1602
1603 return false;
1604}
1605
1606/// Hoist any common code in the successor blocks up into the block. This
1607/// function guarantees that BB dominates all successors. If EqTermsOnly is
1608/// given, only perform hoisting in case both blocks only contain a terminator.
1609/// In that case, only the original BI will be replaced and selects for PHIs are
1610/// added.
1611bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1612 bool EqTermsOnly) {
1613 // This does very trivial matching, with limited scanning, to find identical
1614 // instructions in the two blocks. In particular, we don't want to get into
1615 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1616 // such, we currently just scan for obviously identical instructions in an
1617 // identical order, possibly separated by the same number of non-identical
1618 // instructions.
1619 unsigned int SuccSize = succ_size(BB);
1620 if (SuccSize < 2)
1621 return false;
1622
1623 // If either of the blocks has it's address taken, then we can't do this fold,
1624 // because the code we'd hoist would no longer run when we jump into the block
1625 // by it's address.
1626 for (auto *Succ : successors(BB))
1627 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1628 return false;
1629
1630 auto *TI = BB->getTerminator();
1631
1632 // The second of pair is a SkipFlags bitmask.
1633 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1634 SmallVector<SuccIterPair, 8> SuccIterPairs;
1635 for (auto *Succ : successors(BB)) {
1636 BasicBlock::iterator SuccItr = Succ->begin();
1637 if (isa<PHINode>(*SuccItr))
1638 return false;
1639 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1640 }
1641
1642 // Check if only hoisting terminators is allowed. This does not add new
1643 // instructions to the hoist location.
1644 if (EqTermsOnly) {
1645 // Skip any debug intrinsics, as they are free to hoist.
1646 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1647 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1648 if (!INonDbg->isTerminator())
1649 return false;
1650 }
1651 // Now we know that we only need to hoist debug intrinsics and the
1652 // terminator. Let the loop below handle those 2 cases.
1653 }
1654
1655 // Count how many instructions were not hoisted so far. There's a limit on how
1656 // many instructions we skip, serving as a compilation time control as well as
1657 // preventing excessive increase of life ranges.
1658 unsigned NumSkipped = 0;
1659 // If we find an unreachable instruction at the beginning of a basic block, we
1660 // can still hoist instructions from the rest of the basic blocks.
1661 if (SuccIterPairs.size() > 2) {
1662 erase_if(SuccIterPairs,
1663 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1664 if (SuccIterPairs.size() < 2)
1665 return false;
1666 }
1667
1668 bool Changed = false;
1669
1670 for (;;) {
1671 auto *SuccIterPairBegin = SuccIterPairs.begin();
1672 auto &BB1ItrPair = *SuccIterPairBegin++;
1673 auto OtherSuccIterPairRange =
1674 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1675 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1676
1677 Instruction *I1 = &*BB1ItrPair.first;
1678
1679 // Skip debug info if it is not identical.
1680 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1681 Instruction *I2 = &*Iter;
1682 return I1->isIdenticalToWhenDefined(I2);
1683 });
1684 if (!AllDbgInstsAreIdentical) {
1685 while (isa<DbgInfoIntrinsic>(I1))
1686 I1 = &*++BB1ItrPair.first;
1687 for (auto &SuccIter : OtherSuccIterRange) {
1688 Instruction *I2 = &*SuccIter;
1689 while (isa<DbgInfoIntrinsic>(I2))
1690 I2 = &*++SuccIter;
1691 }
1692 }
1693
1694 bool AllInstsAreIdentical = true;
1695 bool HasTerminator = I1->isTerminator();
1696 for (auto &SuccIter : OtherSuccIterRange) {
1697 Instruction *I2 = &*SuccIter;
1698 HasTerminator |= I2->isTerminator();
1699 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1700 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1701 AllInstsAreIdentical = false;
1702 }
1703
1705 for (auto &SuccIter : OtherSuccIterRange)
1706 OtherInsts.push_back(&*SuccIter);
1707
1708 // If we are hoisting the terminator instruction, don't move one (making a
1709 // broken BB), instead clone it, and remove BI.
1710 if (HasTerminator) {
1711 // Even if BB, which contains only one unreachable instruction, is ignored
1712 // at the beginning of the loop, we can hoist the terminator instruction.
1713 // If any instructions remain in the block, we cannot hoist terminators.
1714 if (NumSkipped || !AllInstsAreIdentical) {
1715 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1716 return Changed;
1717 }
1718
1719 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1720 Changed;
1721 }
1722
1723 if (AllInstsAreIdentical) {
1724 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1725 AllInstsAreIdentical =
1726 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1727 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1728 Instruction *I2 = &*Pair.first;
1729 unsigned SkipFlagsBB2 = Pair.second;
1730 // Even if the instructions are identical, it may not
1731 // be safe to hoist them if we have skipped over
1732 // instructions with side effects or their operands
1733 // weren't hoisted.
1734 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1736 });
1737 }
1738
1739 if (AllInstsAreIdentical) {
1740 BB1ItrPair.first++;
1741 if (isa<DbgInfoIntrinsic>(I1)) {
1742 // The debug location is an integral part of a debug info intrinsic
1743 // and can't be separated from it or replaced. Instead of attempting
1744 // to merge locations, simply hoist both copies of the intrinsic.
1745 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1746 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1747 // and leave any that were not hoisted behind (by calling moveBefore
1748 // rather than moveBeforePreserving).
1749 I1->moveBefore(TI);
1750 for (auto &SuccIter : OtherSuccIterRange) {
1751 auto *I2 = &*SuccIter++;
1752 assert(isa<DbgInfoIntrinsic>(I2));
1753 I2->moveBefore(TI);
1754 }
1755 } else {
1756 // For a normal instruction, we just move one to right before the
1757 // branch, then replace all uses of the other with the first. Finally,
1758 // we remove the now redundant second instruction.
1759 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1760 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1761 // and leave any that were not hoisted behind (by calling moveBefore
1762 // rather than moveBeforePreserving).
1763 I1->moveBefore(TI);
1764 for (auto &SuccIter : OtherSuccIterRange) {
1765 Instruction *I2 = &*SuccIter++;
1766 assert(I2 != I1);
1767 if (!I2->use_empty())
1768 I2->replaceAllUsesWith(I1);
1769 I1->andIRFlags(I2);
1770 combineMetadataForCSE(I1, I2, true);
1771 // I1 and I2 are being combined into a single instruction. Its debug
1772 // location is the merged locations of the original instructions.
1773 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1774 I2->eraseFromParent();
1775 }
1776 }
1777 if (!Changed)
1778 NumHoistCommonCode += SuccIterPairs.size();
1779 Changed = true;
1780 NumHoistCommonInstrs += SuccIterPairs.size();
1781 } else {
1782 if (NumSkipped >= HoistCommonSkipLimit) {
1783 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1784 return Changed;
1785 }
1786 // We are about to skip over a pair of non-identical instructions. Record
1787 // if any have characteristics that would prevent reordering instructions
1788 // across them.
1789 for (auto &SuccIterPair : SuccIterPairs) {
1790 Instruction *I = &*SuccIterPair.first++;
1791 SuccIterPair.second |= skippedInstrFlags(I);
1792 }
1793 ++NumSkipped;
1794 }
1795 }
1796}
1797
1798bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1799 Instruction *TI, Instruction *I1,
1800 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1801
1802 auto *BI = dyn_cast<BranchInst>(TI);
1803
1804 bool Changed = false;
1805 BasicBlock *TIParent = TI->getParent();
1806 BasicBlock *BB1 = I1->getParent();
1807
1808 // Use only for an if statement.
1809 auto *I2 = *OtherSuccTIs.begin();
1810 auto *BB2 = I2->getParent();
1811 if (BI) {
1812 assert(OtherSuccTIs.size() == 1);
1813 assert(BI->getSuccessor(0) == I1->getParent());
1814 assert(BI->getSuccessor(1) == I2->getParent());
1815 }
1816
1817 // In the case of an if statement, we try to hoist an invoke.
1818 // FIXME: Can we define a safety predicate for CallBr?
1819 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1820 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1821 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1822 return false;
1823
1824 // TODO: callbr hoisting currently disabled pending further study.
1825 if (isa<CallBrInst>(I1))
1826 return false;
1827
1828 for (BasicBlock *Succ : successors(BB1)) {
1829 for (PHINode &PN : Succ->phis()) {
1830 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1831 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1832 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1833 if (BB1V == BB2V)
1834 continue;
1835
1836 // In the case of an if statement, check for
1837 // passingValueIsAlwaysUndefined here because we would rather eliminate
1838 // undefined control flow then converting it to a select.
1839 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1841 return false;
1842 }
1843 }
1844 }
1845
1846 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1847 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1848 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1849 // Clone the terminator and hoist it into the pred, without any debug info.
1850 Instruction *NT = I1->clone();
1851 NT->insertInto(TIParent, TI->getIterator());
1852 if (!NT->getType()->isVoidTy()) {
1853 I1->replaceAllUsesWith(NT);
1854 for (Instruction *OtherSuccTI : OtherSuccTIs)
1855 OtherSuccTI->replaceAllUsesWith(NT);
1856 NT->takeName(I1);
1857 }
1858 Changed = true;
1859 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1860
1861 // Ensure terminator gets a debug location, even an unknown one, in case
1862 // it involves inlinable calls.
1864 Locs.push_back(I1->getDebugLoc());
1865 for (auto *OtherSuccTI : OtherSuccTIs)
1866 Locs.push_back(OtherSuccTI->getDebugLoc());
1867 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1868
1869 // PHIs created below will adopt NT's merged DebugLoc.
1870 IRBuilder<NoFolder> Builder(NT);
1871
1872 // In the case of an if statement, hoisting one of the terminators from our
1873 // successor is a great thing. Unfortunately, the successors of the if/else
1874 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1875 // must agree for all PHI nodes, so we insert select instruction to compute
1876 // the final result.
1877 if (BI) {
1878 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1879 for (BasicBlock *Succ : successors(BB1)) {
1880 for (PHINode &PN : Succ->phis()) {
1881 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1882 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1883 if (BB1V == BB2V)
1884 continue;
1885
1886 // These values do not agree. Insert a select instruction before NT
1887 // that determines the right value.
1888 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1889 if (!SI) {
1890 // Propagate fast-math-flags from phi node to its replacement select.
1891 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1892 if (isa<FPMathOperator>(PN))
1893 Builder.setFastMathFlags(PN.getFastMathFlags());
1894
1895 SI = cast<SelectInst>(Builder.CreateSelect(
1896 BI->getCondition(), BB1V, BB2V,
1897 BB1V->getName() + "." + BB2V->getName(), BI));
1898 }
1899
1900 // Make the PHI node use the select for all incoming values for BB1/BB2
1901 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1902 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1903 PN.setIncomingValue(i, SI);
1904 }
1905 }
1906 }
1907
1909
1910 // Update any PHI nodes in our new successors.
1911 for (BasicBlock *Succ : successors(BB1)) {
1912 addPredecessorToBlock(Succ, TIParent, BB1);
1913 if (DTU)
1914 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1915 }
1916
1917 if (DTU)
1918 for (BasicBlock *Succ : successors(TI))
1919 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1920
1922 if (DTU)
1923 DTU->applyUpdates(Updates);
1924 return Changed;
1925}
1926
1927// Check lifetime markers.
1928static bool isLifeTimeMarker(const Instruction *I) {
1929 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1930 switch (II->getIntrinsicID()) {
1931 default:
1932 break;
1933 case Intrinsic::lifetime_start:
1934 case Intrinsic::lifetime_end:
1935 return true;
1936 }
1937 }
1938 return false;
1939}
1940
1941// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1942// into variables.
1944 int OpIdx) {
1945 return !isa<IntrinsicInst>(I);
1946}
1947
1948// All instructions in Insts belong to different blocks that all unconditionally
1949// branch to a common successor. Analyze each instruction and return true if it
1950// would be possible to sink them into their successor, creating one common
1951// instruction instead. For every value that would be required to be provided by
1952// PHI node (because an operand varies in each input block), add to PHIOperands.
1955 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
1956 // Prune out obviously bad instructions to move. Each instruction must have
1957 // the same number of uses, and we check later that the uses are consistent.
1958 std::optional<unsigned> NumUses;
1959 for (auto *I : Insts) {
1960 // These instructions may change or break semantics if moved.
1961 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1962 I->getType()->isTokenTy())
1963 return false;
1964
1965 // Do not try to sink an instruction in an infinite loop - it can cause
1966 // this algorithm to infinite loop.
1967 if (I->getParent()->getSingleSuccessor() == I->getParent())
1968 return false;
1969
1970 // Conservatively return false if I is an inline-asm instruction. Sinking
1971 // and merging inline-asm instructions can potentially create arguments
1972 // that cannot satisfy the inline-asm constraints.
1973 // If the instruction has nomerge or convergent attribute, return false.
1974 if (const auto *C = dyn_cast<CallBase>(I))
1975 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1976 return false;
1977
1978 if (!NumUses)
1979 NumUses = I->getNumUses();
1980 else if (NumUses != I->getNumUses())
1981 return false;
1982 }
1983
1984 const Instruction *I0 = Insts.front();
1985 const auto I0MMRA = MMRAMetadata(*I0);
1986 for (auto *I : Insts) {
1987 if (!I->isSameOperationAs(I0))
1988 return false;
1989
1990 // swifterror pointers can only be used by a load or store; sinking a load
1991 // or store would require introducing a select for the pointer operand,
1992 // which isn't allowed for swifterror pointers.
1993 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1994 return false;
1995 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1996 return false;
1997
1998 // Treat MMRAs conservatively. This pass can be quite aggressive and
1999 // could drop a lot of MMRAs otherwise.
2000 if (MMRAMetadata(*I) != I0MMRA)
2001 return false;
2002 }
2003
2004 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2005 // then the other phi operands must match the instructions from Insts. This
2006 // also has to hold true for any phi nodes that would be created as a result
2007 // of sinking. Both of these cases are represented by PhiOperands.
2008 for (const Use &U : I0->uses()) {
2009 auto It = PHIOperands.find(&U);
2010 if (It == PHIOperands.end())
2011 // There may be uses in other blocks when sinking into a loop header.
2012 return false;
2013 if (!equal(Insts, It->second))
2014 return false;
2015 }
2016
2017 // For calls to be sinkable, they must all be indirect, or have same callee.
2018 // I.e. if we have two direct calls to different callees, we don't want to
2019 // turn that into an indirect call. Likewise, if we have an indirect call,
2020 // and a direct call, we don't actually want to have a single indirect call.
2021 if (isa<CallBase>(I0)) {
2022 auto IsIndirectCall = [](const Instruction *I) {
2023 return cast<CallBase>(I)->isIndirectCall();
2024 };
2025 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2026 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2027 if (HaveIndirectCalls) {
2028 if (!AllCallsAreIndirect)
2029 return false;
2030 } else {
2031 // All callees must be identical.
2032 Value *Callee = nullptr;
2033 for (const Instruction *I : Insts) {
2034 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2035 if (!Callee)
2036 Callee = CurrCallee;
2037 else if (Callee != CurrCallee)
2038 return false;
2039 }
2040 }
2041 }
2042
2043 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2044 Value *Op = I0->getOperand(OI);
2045 if (Op->getType()->isTokenTy())
2046 // Don't touch any operand of token type.
2047 return false;
2048
2049 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2050 assert(I->getNumOperands() == I0->getNumOperands());
2051 return I->getOperand(OI) == I0->getOperand(OI);
2052 };
2053 if (!all_of(Insts, SameAsI0)) {
2054 // Because SROA historically couldn't handle speculating stores of
2055 // selects, we try not to sink loads, stores or lifetime markers of
2056 // allocas when we'd have to create a PHI for the address operand.
2057 // TODO: SROA supports speculation for loads and stores now -- remove
2058 // this hack?
2059 if (isa<StoreInst>(I0) && OI == 1 &&
2060 any_of(Insts, [](const Instruction *I) {
2061 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2062 }))
2063 return false;
2064 if (isa<LoadInst>(I0) && OI == 0 &&
2065 any_of(Insts, [](const Instruction *I) {
2066 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2067 }))
2068 return false;
2069 if (isLifeTimeMarker(I0) && OI == 1 &&
2070 any_of(Insts, [](const Instruction *I) {
2071 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2072 }))
2073 return false;
2074
2075 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2077 // We can't create a PHI from this GEP.
2078 return false;
2079 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2080 for (auto *I : Insts)
2081 Ops.push_back(I->getOperand(OI));
2082 }
2083 }
2084 return true;
2085}
2086
2087// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2088// instruction of every block in Blocks to their common successor, commoning
2089// into one instruction.
2091 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2092
2093 // canSinkInstructions returning true guarantees that every block has at
2094 // least one non-terminator instruction.
2096 for (auto *BB : Blocks) {
2097 Instruction *I = BB->getTerminator();
2098 do {
2099 I = I->getPrevNode();
2100 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2101 if (!isa<DbgInfoIntrinsic>(I))
2102 Insts.push_back(I);
2103 }
2104
2105 // We don't need to do any more checking here; canSinkInstructions should
2106 // have done it all for us.
2107 SmallVector<Value*, 4> NewOperands;
2108 Instruction *I0 = Insts.front();
2109 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2110 // This check is different to that in canSinkInstructions. There, we
2111 // cared about the global view once simplifycfg (and instcombine) have
2112 // completed - it takes into account PHIs that become trivially
2113 // simplifiable. However here we need a more local view; if an operand
2114 // differs we create a PHI and rely on instcombine to clean up the very
2115 // small mess we may make.
2116 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2117 return I->getOperand(O) != I0->getOperand(O);
2118 });
2119 if (!NeedPHI) {
2120 NewOperands.push_back(I0->getOperand(O));
2121 continue;
2122 }
2123
2124 // Create a new PHI in the successor block and populate it.
2125 auto *Op = I0->getOperand(O);
2126 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2127 auto *PN =
2128 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2129 PN->insertBefore(BBEnd->begin());
2130 for (auto *I : Insts)
2131 PN->addIncoming(I->getOperand(O), I->getParent());
2132 NewOperands.push_back(PN);
2133 }
2134
2135 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2136 // and move it to the start of the successor block.
2137 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2138 I0->getOperandUse(O).set(NewOperands[O]);
2139
2140 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2141
2142 // Update metadata and IR flags, and merge debug locations.
2143 for (auto *I : Insts)
2144 if (I != I0) {
2145 // The debug location for the "common" instruction is the merged locations
2146 // of all the commoned instructions. We start with the original location
2147 // of the "common" instruction and iteratively merge each location in the
2148 // loop below.
2149 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2150 // However, as N-way merge for CallInst is rare, so we use simplified API
2151 // instead of using complex API for N-way merge.
2152 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2153 combineMetadataForCSE(I0, I, true);
2154 I0->andIRFlags(I);
2155 }
2156
2157 for (User *U : make_early_inc_range(I0->users())) {
2158 // canSinkLastInstruction checked that all instructions are only used by
2159 // phi nodes in a way that allows replacing the phi node with the common
2160 // instruction.
2161 auto *PN = cast<PHINode>(U);
2162 PN->replaceAllUsesWith(I0);
2163 PN->eraseFromParent();
2164 }
2165
2166 // Finally nuke all instructions apart from the common instruction.
2167 for (auto *I : Insts) {
2168 if (I == I0)
2169 continue;
2170 // The remaining uses are debug users, replace those with the common inst.
2171 // In most (all?) cases this just introduces a use-before-def.
2172 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2173 I->replaceAllUsesWith(I0);
2174 I->eraseFromParent();
2175 }
2176}
2177
2178namespace {
2179
2180 // LockstepReverseIterator - Iterates through instructions
2181 // in a set of blocks in reverse order from the first non-terminator.
2182 // For example (assume all blocks have size n):
2183 // LockstepReverseIterator I([B1, B2, B3]);
2184 // *I-- = [B1[n], B2[n], B3[n]];
2185 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2186 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2187 // ...
2188 class LockstepReverseIterator {
2191 bool Fail;
2192
2193 public:
2194 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2195 reset();
2196 }
2197
2198 void reset() {
2199 Fail = false;
2200 Insts.clear();
2201 for (auto *BB : Blocks) {
2202 Instruction *Inst = BB->getTerminator();
2203 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2204 Inst = Inst->getPrevNode();
2205 if (!Inst) {
2206 // Block wasn't big enough.
2207 Fail = true;
2208 return;
2209 }
2210 Insts.push_back(Inst);
2211 }
2212 }
2213
2214 bool isValid() const {
2215 return !Fail;
2216 }
2217
2218 void operator--() {
2219 if (Fail)
2220 return;
2221 for (auto *&Inst : Insts) {
2222 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2223 Inst = Inst->getPrevNode();
2224 // Already at beginning of block.
2225 if (!Inst) {
2226 Fail = true;
2227 return;
2228 }
2229 }
2230 }
2231
2232 void operator++() {
2233 if (Fail)
2234 return;
2235 for (auto *&Inst : Insts) {
2236 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2237 Inst = Inst->getNextNode();
2238 // Already at end of block.
2239 if (!Inst) {
2240 Fail = true;
2241 return;
2242 }
2243 }
2244 }
2245
2247 return Insts;
2248 }
2249 };
2250
2251} // end anonymous namespace
2252
2253/// Check whether BB's predecessors end with unconditional branches. If it is
2254/// true, sink any common code from the predecessors to BB.
2256 DomTreeUpdater *DTU) {
2257 // We support two situations:
2258 // (1) all incoming arcs are unconditional
2259 // (2) there are non-unconditional incoming arcs
2260 //
2261 // (2) is very common in switch defaults and
2262 // else-if patterns;
2263 //
2264 // if (a) f(1);
2265 // else if (b) f(2);
2266 //
2267 // produces:
2268 //
2269 // [if]
2270 // / \
2271 // [f(1)] [if]
2272 // | | \
2273 // | | |
2274 // | [f(2)]|
2275 // \ | /
2276 // [ end ]
2277 //
2278 // [end] has two unconditional predecessor arcs and one conditional. The
2279 // conditional refers to the implicit empty 'else' arc. This conditional
2280 // arc can also be caused by an empty default block in a switch.
2281 //
2282 // In this case, we attempt to sink code from all *unconditional* arcs.
2283 // If we can sink instructions from these arcs (determined during the scan
2284 // phase below) we insert a common successor for all unconditional arcs and
2285 // connect that to [end], to enable sinking:
2286 //
2287 // [if]
2288 // / \
2289 // [x(1)] [if]
2290 // | | \
2291 // | | \
2292 // | [x(2)] |
2293 // \ / |
2294 // [sink.split] |
2295 // \ /
2296 // [ end ]
2297 //
2298 SmallVector<BasicBlock*,4> UnconditionalPreds;
2299 bool HaveNonUnconditionalPredecessors = false;
2300 for (auto *PredBB : predecessors(BB)) {
2301 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2302 if (PredBr && PredBr->isUnconditional())
2303 UnconditionalPreds.push_back(PredBB);
2304 else
2305 HaveNonUnconditionalPredecessors = true;
2306 }
2307 if (UnconditionalPreds.size() < 2)
2308 return false;
2309
2310 // We take a two-step approach to tail sinking. First we scan from the end of
2311 // each block upwards in lockstep. If the n'th instruction from the end of each
2312 // block can be sunk, those instructions are added to ValuesToSink and we
2313 // carry on. If we can sink an instruction but need to PHI-merge some operands
2314 // (because they're not identical in each instruction) we add these to
2315 // PHIOperands.
2316 // We prepopulate PHIOperands with the phis that already exist in BB.
2318 for (PHINode &PN : BB->phis()) {
2320 for (const Use &U : PN.incoming_values())
2321 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2322 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2323 for (BasicBlock *Pred : UnconditionalPreds)
2324 Ops.push_back(*IncomingVals[Pred]);
2325 }
2326
2327 int ScanIdx = 0;
2328 SmallPtrSet<Value*,4> InstructionsToSink;
2329 LockstepReverseIterator LRI(UnconditionalPreds);
2330 while (LRI.isValid() &&
2331 canSinkInstructions(*LRI, PHIOperands)) {
2332 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2333 << "\n");
2334 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2335 ++ScanIdx;
2336 --LRI;
2337 }
2338
2339 // If no instructions can be sunk, early-return.
2340 if (ScanIdx == 0)
2341 return false;
2342
2343 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2344
2345 if (!followedByDeoptOrUnreachable) {
2346 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2347 // actually sink before encountering instruction that is unprofitable to
2348 // sink?
2349 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2350 unsigned NumPHIInsts = 0;
2351 for (Use &U : (*LRI)[0]->operands()) {
2352 auto It = PHIOperands.find(&U);
2353 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2354 return InstructionsToSink.contains(V);
2355 })) {
2356 ++NumPHIInsts;
2357 // FIXME: this check is overly optimistic. We may end up not sinking
2358 // said instruction, due to the very same profitability check.
2359 // See @creating_too_many_phis in sink-common-code.ll.
2360 }
2361 }
2362 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2363 return NumPHIInsts <= 1;
2364 };
2365
2366 // We've determined that we are going to sink last ScanIdx instructions,
2367 // and recorded them in InstructionsToSink. Now, some instructions may be
2368 // unprofitable to sink. But that determination depends on the instructions
2369 // that we are going to sink.
2370
2371 // First, forward scan: find the first instruction unprofitable to sink,
2372 // recording all the ones that are profitable to sink.
2373 // FIXME: would it be better, after we detect that not all are profitable.
2374 // to either record the profitable ones, or erase the unprofitable ones?
2375 // Maybe we need to choose (at runtime) the one that will touch least
2376 // instrs?
2377 LRI.reset();
2378 int Idx = 0;
2379 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2380 while (Idx < ScanIdx) {
2381 if (!ProfitableToSinkInstruction(LRI)) {
2382 // Too many PHIs would be created.
2383 LLVM_DEBUG(
2384 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2385 break;
2386 }
2387 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2388 --LRI;
2389 ++Idx;
2390 }
2391
2392 // If no instructions can be sunk, early-return.
2393 if (Idx == 0)
2394 return false;
2395
2396 // Did we determine that (only) some instructions are unprofitable to sink?
2397 if (Idx < ScanIdx) {
2398 // Okay, some instructions are unprofitable.
2399 ScanIdx = Idx;
2400 InstructionsToSink = InstructionsProfitableToSink;
2401
2402 // But, that may make other instructions unprofitable, too.
2403 // So, do a backward scan, do any earlier instructions become
2404 // unprofitable?
2405 assert(
2406 !ProfitableToSinkInstruction(LRI) &&
2407 "We already know that the last instruction is unprofitable to sink");
2408 ++LRI;
2409 --Idx;
2410 while (Idx >= 0) {
2411 // If we detect that an instruction becomes unprofitable to sink,
2412 // all earlier instructions won't be sunk either,
2413 // so preemptively keep InstructionsProfitableToSink in sync.
2414 // FIXME: is this the most performant approach?
2415 for (auto *I : *LRI)
2416 InstructionsProfitableToSink.erase(I);
2417 if (!ProfitableToSinkInstruction(LRI)) {
2418 // Everything starting with this instruction won't be sunk.
2419 ScanIdx = Idx;
2420 InstructionsToSink = InstructionsProfitableToSink;
2421 }
2422 ++LRI;
2423 --Idx;
2424 }
2425 }
2426
2427 // If no instructions can be sunk, early-return.
2428 if (ScanIdx == 0)
2429 return false;
2430 }
2431
2432 bool Changed = false;
2433
2434 if (HaveNonUnconditionalPredecessors) {
2435 if (!followedByDeoptOrUnreachable) {
2436 // It is always legal to sink common instructions from unconditional
2437 // predecessors. However, if not all predecessors are unconditional,
2438 // this transformation might be pessimizing. So as a rule of thumb,
2439 // don't do it unless we'd sink at least one non-speculatable instruction.
2440 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2441 LRI.reset();
2442 int Idx = 0;
2443 bool Profitable = false;
2444 while (Idx < ScanIdx) {
2445 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2446 Profitable = true;
2447 break;
2448 }
2449 --LRI;
2450 ++Idx;
2451 }
2452 if (!Profitable)
2453 return false;
2454 }
2455
2456 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2457 // We have a conditional edge and we're going to sink some instructions.
2458 // Insert a new block postdominating all blocks we're going to sink from.
2459 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2460 // Edges couldn't be split.
2461 return false;
2462 Changed = true;
2463 }
2464
2465 // Now that we've analyzed all potential sinking candidates, perform the
2466 // actual sink. We iteratively sink the last non-terminator of the source
2467 // blocks into their common successor unless doing so would require too
2468 // many PHI instructions to be generated (currently only one PHI is allowed
2469 // per sunk instruction).
2470 //
2471 // We can use InstructionsToSink to discount values needing PHI-merging that will
2472 // actually be sunk in a later iteration. This allows us to be more
2473 // aggressive in what we sink. This does allow a false positive where we
2474 // sink presuming a later value will also be sunk, but stop half way through
2475 // and never actually sink it which means we produce more PHIs than intended.
2476 // This is unlikely in practice though.
2477 int SinkIdx = 0;
2478 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2479 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2480 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2481 << "\n");
2482
2483 // Because we've sunk every instruction in turn, the current instruction to
2484 // sink is always at index 0.
2485 LRI.reset();
2486
2487 sinkLastInstruction(UnconditionalPreds);
2488 NumSinkCommonInstrs++;
2489 Changed = true;
2490 }
2491 if (SinkIdx != 0)
2492 ++NumSinkCommonCode;
2493 return Changed;
2494}
2495
2496namespace {
2497
2498struct CompatibleSets {
2499 using SetTy = SmallVector<InvokeInst *, 2>;
2500
2502
2503 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2504
2505 SetTy &getCompatibleSet(InvokeInst *II);
2506
2507 void insert(InvokeInst *II);
2508};
2509
2510CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2511 // Perform a linear scan over all the existing sets, see if the new `invoke`
2512 // is compatible with any particular set. Since we know that all the `invokes`
2513 // within a set are compatible, only check the first `invoke` in each set.
2514 // WARNING: at worst, this has quadratic complexity.
2515 for (CompatibleSets::SetTy &Set : Sets) {
2516 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2517 return Set;
2518 }
2519
2520 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2521 return Sets.emplace_back();
2522}
2523
2524void CompatibleSets::insert(InvokeInst *II) {
2525 getCompatibleSet(II).emplace_back(II);
2526}
2527
2528bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2529 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2530
2531 // Can we theoretically merge these `invoke`s?
2532 auto IsIllegalToMerge = [](InvokeInst *II) {
2533 return II->cannotMerge() || II->isInlineAsm();
2534 };
2535 if (any_of(Invokes, IsIllegalToMerge))
2536 return false;
2537
2538 // Either both `invoke`s must be direct,
2539 // or both `invoke`s must be indirect.
2540 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2541 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2542 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2543 if (HaveIndirectCalls) {
2544 if (!AllCallsAreIndirect)
2545 return false;
2546 } else {
2547 // All callees must be identical.
2548 Value *Callee = nullptr;
2549 for (InvokeInst *II : Invokes) {
2550 Value *CurrCallee = II->getCalledOperand();
2551 assert(CurrCallee && "There is always a called operand.");
2552 if (!Callee)
2553 Callee = CurrCallee;
2554 else if (Callee != CurrCallee)
2555 return false;
2556 }
2557 }
2558
2559 // Either both `invoke`s must not have a normal destination,
2560 // or both `invoke`s must have a normal destination,
2561 auto HasNormalDest = [](InvokeInst *II) {
2562 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2563 };
2564 if (any_of(Invokes, HasNormalDest)) {
2565 // Do not merge `invoke` that does not have a normal destination with one
2566 // that does have a normal destination, even though doing so would be legal.
2567 if (!all_of(Invokes, HasNormalDest))
2568 return false;
2569
2570 // All normal destinations must be identical.
2571 BasicBlock *NormalBB = nullptr;
2572 for (InvokeInst *II : Invokes) {
2573 BasicBlock *CurrNormalBB = II->getNormalDest();
2574 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2575 if (!NormalBB)
2576 NormalBB = CurrNormalBB;
2577 else if (NormalBB != CurrNormalBB)
2578 return false;
2579 }
2580
2581 // In the normal destination, the incoming values for these two `invoke`s
2582 // must be compatible.
2583 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2585 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2586 &EquivalenceSet))
2587 return false;
2588 }
2589
2590#ifndef NDEBUG
2591 // All unwind destinations must be identical.
2592 // We know that because we have started from said unwind destination.
2593 BasicBlock *UnwindBB = nullptr;
2594 for (InvokeInst *II : Invokes) {
2595 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2596 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2597 if (!UnwindBB)
2598 UnwindBB = CurrUnwindBB;
2599 else
2600 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2601 }
2602#endif
2603
2604 // In the unwind destination, the incoming values for these two `invoke`s
2605 // must be compatible.
2607 Invokes.front()->getUnwindDest(),
2608 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2609 return false;
2610
2611 // Ignoring arguments, these `invoke`s must be identical,
2612 // including operand bundles.
2613 const InvokeInst *II0 = Invokes.front();
2614 for (auto *II : Invokes.drop_front())
2615 if (!II->isSameOperationAs(II0))
2616 return false;
2617
2618 // Can we theoretically form the data operands for the merged `invoke`?
2619 auto IsIllegalToMergeArguments = [](auto Ops) {
2620 Use &U0 = std::get<0>(Ops);
2621 Use &U1 = std::get<1>(Ops);
2622 if (U0 == U1)
2623 return false;
2624 return U0->getType()->isTokenTy() ||
2625 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2626 U0.getOperandNo());
2627 };
2628 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2629 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2630 IsIllegalToMergeArguments))
2631 return false;
2632
2633 return true;
2634}
2635
2636} // namespace
2637
2638// Merge all invokes in the provided set, all of which are compatible
2639// as per the `CompatibleSets::shouldBelongToSameSet()`.
2641 DomTreeUpdater *DTU) {
2642 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2643
2645 if (DTU)
2646 Updates.reserve(2 + 3 * Invokes.size());
2647
2648 bool HasNormalDest =
2649 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2650
2651 // Clone one of the invokes into a new basic block.
2652 // Since they are all compatible, it doesn't matter which invoke is cloned.
2653 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2654 InvokeInst *II0 = Invokes.front();
2655 BasicBlock *II0BB = II0->getParent();
2656 BasicBlock *InsertBeforeBlock =
2657 II0->getParent()->getIterator()->getNextNode();
2658 Function *Func = II0BB->getParent();
2659 LLVMContext &Ctx = II0->getContext();
2660
2661 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2662 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2663
2664 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2665 // NOTE: all invokes have the same attributes, so no handling needed.
2666 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2667
2668 if (!HasNormalDest) {
2669 // This set does not have a normal destination,
2670 // so just form a new block with unreachable terminator.
2671 BasicBlock *MergedNormalDest = BasicBlock::Create(
2672 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2673 new UnreachableInst(Ctx, MergedNormalDest);
2674 MergedInvoke->setNormalDest(MergedNormalDest);
2675 }
2676
2677 // The unwind destination, however, remainds identical for all invokes here.
2678
2679 return MergedInvoke;
2680 }();
2681
2682 if (DTU) {
2683 // Predecessor blocks that contained these invokes will now branch to
2684 // the new block that contains the merged invoke, ...
2685 for (InvokeInst *II : Invokes)
2686 Updates.push_back(
2687 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2688
2689 // ... which has the new `unreachable` block as normal destination,
2690 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2691 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2692 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2693 SuccBBOfMergedInvoke});
2694
2695 // Since predecessor blocks now unconditionally branch to a new block,
2696 // they no longer branch to their original successors.
2697 for (InvokeInst *II : Invokes)
2698 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2699 Updates.push_back(
2700 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2701 }
2702
2703 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2704
2705 // Form the merged operands for the merged invoke.
2706 for (Use &U : MergedInvoke->operands()) {
2707 // Only PHI together the indirect callees and data operands.
2708 if (MergedInvoke->isCallee(&U)) {
2709 if (!IsIndirectCall)
2710 continue;
2711 } else if (!MergedInvoke->isDataOperand(&U))
2712 continue;
2713
2714 // Don't create trivial PHI's with all-identical incoming values.
2715 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2716 return II->getOperand(U.getOperandNo()) != U.get();
2717 });
2718 if (!NeedPHI)
2719 continue;
2720
2721 // Form a PHI out of all the data ops under this index.
2723 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2724 for (InvokeInst *II : Invokes)
2725 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2726
2727 U.set(PN);
2728 }
2729
2730 // We've ensured that each PHI node has compatible (identical) incoming values
2731 // when coming from each of the `invoke`s in the current merge set,
2732 // so update the PHI nodes accordingly.
2733 for (BasicBlock *Succ : successors(MergedInvoke))
2734 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2735 /*ExistPred=*/Invokes.front()->getParent());
2736
2737 // And finally, replace the original `invoke`s with an unconditional branch
2738 // to the block with the merged `invoke`. Also, give that merged `invoke`
2739 // the merged debugloc of all the original `invoke`s.
2740 DILocation *MergedDebugLoc = nullptr;
2741 for (InvokeInst *II : Invokes) {
2742 // Compute the debug location common to all the original `invoke`s.
2743 if (!MergedDebugLoc)
2744 MergedDebugLoc = II->getDebugLoc();
2745 else
2746 MergedDebugLoc =
2747 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2748
2749 // And replace the old `invoke` with an unconditionally branch
2750 // to the block with the merged `invoke`.
2751 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2752 OrigSuccBB->removePredecessor(II->getParent());
2753 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2754 II->replaceAllUsesWith(MergedInvoke);
2755 II->eraseFromParent();
2756 ++NumInvokesMerged;
2757 }
2758 MergedInvoke->setDebugLoc(MergedDebugLoc);
2759 ++NumInvokeSetsFormed;
2760
2761 if (DTU)
2762 DTU->applyUpdates(Updates);
2763}
2764
2765/// If this block is a `landingpad` exception handling block, categorize all
2766/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2767/// being "mergeable" together, and then merge invokes in each set together.
2768///
2769/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2770/// [...] [...]
2771/// | |
2772/// [invoke0] [invoke1]
2773/// / \ / \
2774/// [cont0] [landingpad] [cont1]
2775/// to:
2776/// [...] [...]
2777/// \ /
2778/// [invoke]
2779/// / \
2780/// [cont] [landingpad]
2781///
2782/// But of course we can only do that if the invokes share the `landingpad`,
2783/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2784/// and the invoked functions are "compatible".
2787 return false;
2788
2789 bool Changed = false;
2790
2791 // FIXME: generalize to all exception handling blocks?
2792 if (!BB->isLandingPad())
2793 return Changed;
2794
2795 CompatibleSets Grouper;
2796
2797 // Record all the predecessors of this `landingpad`. As per verifier,
2798 // the only allowed predecessor is the unwind edge of an `invoke`.
2799 // We want to group "compatible" `invokes` into the same set to be merged.
2800 for (BasicBlock *PredBB : predecessors(BB))
2801 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2802
2803 // And now, merge `invoke`s that were grouped togeter.
2804 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2805 if (Invokes.size() < 2)
2806 continue;
2807 Changed = true;
2808 mergeCompatibleInvokesImpl(Invokes, DTU);
2809 }
2810
2811 return Changed;
2812}
2813
2814namespace {
2815/// Track ephemeral values, which should be ignored for cost-modelling
2816/// purposes. Requires walking instructions in reverse order.
2817class EphemeralValueTracker {
2819
2820 bool isEphemeral(const Instruction *I) {
2821 if (isa<AssumeInst>(I))
2822 return true;
2823 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2824 all_of(I->users(), [&](const User *U) {
2825 return EphValues.count(cast<Instruction>(U));
2826 });
2827 }
2828
2829public:
2830 bool track(const Instruction *I) {
2831 if (isEphemeral(I)) {
2832 EphValues.insert(I);
2833 return true;
2834 }
2835 return false;
2836 }
2837
2838 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2839};
2840} // namespace
2841
2842/// Determine if we can hoist sink a sole store instruction out of a
2843/// conditional block.
2844///
2845/// We are looking for code like the following:
2846/// BrBB:
2847/// store i32 %add, i32* %arrayidx2
2848/// ... // No other stores or function calls (we could be calling a memory
2849/// ... // function).
2850/// %cmp = icmp ult %x, %y
2851/// br i1 %cmp, label %EndBB, label %ThenBB
2852/// ThenBB:
2853/// store i32 %add5, i32* %arrayidx2
2854/// br label EndBB
2855/// EndBB:
2856/// ...
2857/// We are going to transform this into:
2858/// BrBB:
2859/// store i32 %add, i32* %arrayidx2
2860/// ... //
2861/// %cmp = icmp ult %x, %y
2862/// %add.add5 = select i1 %cmp, i32 %add, %add5
2863/// store i32 %add.add5, i32* %arrayidx2
2864/// ...
2865///
2866/// \return The pointer to the value of the previous store if the store can be
2867/// hoisted into the predecessor block. 0 otherwise.
2869 BasicBlock *StoreBB, BasicBlock *EndBB) {
2870 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2871 if (!StoreToHoist)
2872 return nullptr;
2873
2874 // Volatile or atomic.
2875 if (!StoreToHoist->isSimple())
2876 return nullptr;
2877
2878 Value *StorePtr = StoreToHoist->getPointerOperand();
2879 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2880
2881 // Look for a store to the same pointer in BrBB.
2882 unsigned MaxNumInstToLookAt = 9;
2883 // Skip pseudo probe intrinsic calls which are not really killing any memory
2884 // accesses.
2885 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2886 if (!MaxNumInstToLookAt)
2887 break;
2888 --MaxNumInstToLookAt;
2889
2890 // Could be calling an instruction that affects memory like free().
2891 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2892 return nullptr;
2893
2894 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2895 // Found the previous store to same location and type. Make sure it is
2896 // simple, to avoid introducing a spurious non-atomic write after an
2897 // atomic write.
2898 if (SI->getPointerOperand() == StorePtr &&
2899 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2900 SI->getAlign() >= StoreToHoist->getAlign())
2901 // Found the previous store, return its value operand.
2902 return SI->getValueOperand();
2903 return nullptr; // Unknown store.
2904 }
2905
2906 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2907 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2908 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2909 // Local objects (created by an `alloca` instruction) are always
2910 // writable, so once we are past a read from a location it is valid to
2911 // also write to that same location.
2912 // If the address of the local object never escapes the function, that
2913 // means it's never concurrently read or written, hence moving the store
2914 // from under the condition will not introduce a data race.
2915 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2916 if (AI && !PointerMayBeCaptured(AI, false, true))
2917 // Found a previous load, return it.
2918 return LI;
2919 }
2920 // The load didn't work out, but we may still find a store.
2921 }
2922 }
2923
2924 return nullptr;
2925}
2926
2927/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2928/// converted to selects.
2930 BasicBlock *EndBB,
2931 unsigned &SpeculatedInstructions,
2933 const TargetTransformInfo &TTI) {
2935 BB->getParent()->hasMinSize()
2938
2939 bool HaveRewritablePHIs = false;
2940 for (PHINode &PN : EndBB->phis()) {
2941 Value *OrigV = PN.getIncomingValueForBlock(BB);
2942 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2943
2944 // FIXME: Try to remove some of the duplication with
2945 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2946 if (ThenV == OrigV)
2947 continue;
2948
2949 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2951
2952 // Don't convert to selects if we could remove undefined behavior instead.
2953 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2955 return false;
2956
2957 HaveRewritablePHIs = true;
2958 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2959 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2960 if (!OrigCE && !ThenCE)
2961 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2962
2963 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2964 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2965 InstructionCost MaxCost =
2967 if (OrigCost + ThenCost > MaxCost)
2968 return false;
2969
2970 // Account for the cost of an unfolded ConstantExpr which could end up
2971 // getting expanded into Instructions.
2972 // FIXME: This doesn't account for how many operations are combined in the
2973 // constant expression.
2974 ++SpeculatedInstructions;
2975 if (SpeculatedInstructions > 1)
2976 return false;
2977 }
2978
2979 return HaveRewritablePHIs;
2980}
2981
2982static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
2983 const TargetTransformInfo &TTI) {
2984 // If the branch is non-unpredictable, and is predicted to *not* branch to
2985 // the `then` block, then avoid speculating it.
2986 if (BI->getMetadata(LLVMContext::MD_unpredictable))
2987 return true;
2988
2989 uint64_t TWeight, FWeight;
2990 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
2991 return true;
2992
2993 uint64_t EndWeight = Invert ? TWeight : FWeight;
2994 BranchProbability BIEndProb =
2995 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
2997 return BIEndProb < Likely;
2998}
2999
3000/// Speculate a conditional basic block flattening the CFG.
3001///
3002/// Note that this is a very risky transform currently. Speculating
3003/// instructions like this is most often not desirable. Instead, there is an MI
3004/// pass which can do it with full awareness of the resource constraints.
3005/// However, some cases are "obvious" and we should do directly. An example of
3006/// this is speculating a single, reasonably cheap instruction.
3007///
3008/// There is only one distinct advantage to flattening the CFG at the IR level:
3009/// it makes very common but simplistic optimizations such as are common in
3010/// instcombine and the DAG combiner more powerful by removing CFG edges and
3011/// modeling their effects with easier to reason about SSA value graphs.
3012///
3013///
3014/// An illustration of this transform is turning this IR:
3015/// \code
3016/// BB:
3017/// %cmp = icmp ult %x, %y
3018/// br i1 %cmp, label %EndBB, label %ThenBB
3019/// ThenBB:
3020/// %sub = sub %x, %y
3021/// br label BB2
3022/// EndBB:
3023/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3024/// ...
3025/// \endcode
3026///
3027/// Into this IR:
3028/// \code
3029/// BB:
3030/// %cmp = icmp ult %x, %y
3031/// %sub = sub %x, %y
3032/// %cond = select i1 %cmp, 0, %sub
3033/// ...
3034/// \endcode
3035///
3036/// \returns true if the conditional block is removed.
3037bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3038 BasicBlock *ThenBB) {
3039 if (!Options.SpeculateBlocks)
3040 return false;
3041
3042 // Be conservative for now. FP select instruction can often be expensive.
3043 Value *BrCond = BI->getCondition();
3044 if (isa<FCmpInst>(BrCond))
3045 return false;
3046
3047 BasicBlock *BB = BI->getParent();
3048 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3049 InstructionCost Budget =
3051
3052 // If ThenBB is actually on the false edge of the conditional branch, remember
3053 // to swap the select operands later.
3054 bool Invert = false;
3055 if (ThenBB != BI->getSuccessor(0)) {
3056 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3057 Invert = true;
3058 }
3059 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3060
3061 if (!isProfitableToSpeculate(BI, Invert, TTI))
3062 return false;
3063
3064 // Keep a count of how many times instructions are used within ThenBB when
3065 // they are candidates for sinking into ThenBB. Specifically:
3066 // - They are defined in BB, and
3067 // - They have no side effects, and
3068 // - All of their uses are in ThenBB.
3069 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3070
3071 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3072
3073 unsigned SpeculatedInstructions = 0;
3074 Value *SpeculatedStoreValue = nullptr;
3075 StoreInst *SpeculatedStore = nullptr;
3076 EphemeralValueTracker EphTracker;
3077 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3078 // Skip debug info.
3079 if (isa<DbgInfoIntrinsic>(I)) {
3080 SpeculatedDbgIntrinsics.push_back(&I);
3081 continue;
3082 }
3083
3084 // Skip pseudo probes. The consequence is we lose track of the branch
3085 // probability for ThenBB, which is fine since the optimization here takes
3086 // place regardless of the branch probability.
3087 if (isa<PseudoProbeInst>(I)) {
3088 // The probe should be deleted so that it will not be over-counted when
3089 // the samples collected on the non-conditional path are counted towards
3090 // the conditional path. We leave it for the counts inference algorithm to
3091 // figure out a proper count for an unknown probe.
3092 SpeculatedDbgIntrinsics.push_back(&I);
3093 continue;
3094 }
3095
3096 // Ignore ephemeral values, they will be dropped by the transform.
3097 if (EphTracker.track(&I))
3098 continue;
3099
3100 // Only speculatively execute a single instruction (not counting the
3101 // terminator) for now.
3102 ++SpeculatedInstructions;
3103 if (SpeculatedInstructions > 1)
3104 return false;
3105
3106 // Don't hoist the instruction if it's unsafe or expensive.
3108 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3109 &I, BB, ThenBB, EndBB))))
3110 return false;
3111 if (!SpeculatedStoreValue &&
3114 return false;
3115
3116 // Store the store speculation candidate.
3117 if (SpeculatedStoreValue)
3118 SpeculatedStore = cast<StoreInst>(&I);
3119
3120 // Do not hoist the instruction if any of its operands are defined but not
3121 // used in BB. The transformation will prevent the operand from
3122 // being sunk into the use block.
3123 for (Use &Op : I.operands()) {
3124 Instruction *OpI = dyn_cast<Instruction>(Op);
3125 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3126 continue; // Not a candidate for sinking.
3127
3128 ++SinkCandidateUseCounts[OpI];
3129 }
3130 }
3131
3132 // Consider any sink candidates which are only used in ThenBB as costs for
3133 // speculation. Note, while we iterate over a DenseMap here, we are summing
3134 // and so iteration order isn't significant.
3135 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3136 if (Inst->hasNUses(Count)) {
3137 ++SpeculatedInstructions;
3138 if (SpeculatedInstructions > 1)
3139 return false;
3140 }
3141
3142 // Check that we can insert the selects and that it's not too expensive to do
3143 // so.
3144 bool Convert = SpeculatedStore != nullptr;
3146 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3147 SpeculatedInstructions,
3148 Cost, TTI);
3149 if (!Convert || Cost > Budget)
3150 return false;
3151
3152 // If we get here, we can hoist the instruction and if-convert.
3153 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3154
3155 // Insert a select of the value of the speculated store.
3156 if (SpeculatedStoreValue) {
3157 IRBuilder<NoFolder> Builder(BI);
3158 Value *OrigV = SpeculatedStore->getValueOperand();
3159 Value *TrueV = SpeculatedStore->getValueOperand();
3160 Value *FalseV = SpeculatedStoreValue;
3161 if (Invert)
3162 std::swap(TrueV, FalseV);
3163 Value *S = Builder.CreateSelect(
3164 BrCond, TrueV, FalseV, "spec.store.select", BI);
3165 SpeculatedStore->setOperand(0, S);
3166 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3167 SpeculatedStore->getDebugLoc());
3168 // The value stored is still conditional, but the store itself is now
3169 // unconditonally executed, so we must be sure that any linked dbg.assign
3170 // intrinsics are tracking the new stored value (the result of the
3171 // select). If we don't, and the store were to be removed by another pass
3172 // (e.g. DSE), then we'd eventually end up emitting a location describing
3173 // the conditional value, unconditionally.
3174 //
3175 // === Before this transformation ===
3176 // pred:
3177 // store %one, %x.dest, !DIAssignID !1
3178 // dbg.assign %one, "x", ..., !1, ...
3179 // br %cond if.then
3180 //
3181 // if.then:
3182 // store %two, %x.dest, !DIAssignID !2
3183 // dbg.assign %two, "x", ..., !2, ...
3184 //
3185 // === After this transformation ===
3186 // pred:
3187 // store %one, %x.dest, !DIAssignID !1
3188 // dbg.assign %one, "x", ..., !1
3189 /// ...
3190 // %merge = select %cond, %two, %one
3191 // store %merge, %x.dest, !DIAssignID !2
3192 // dbg.assign %merge, "x", ..., !2
3193 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3194 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3195 DbgAssign->replaceVariableLocationOp(OrigV, S);
3196 };
3197 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3198 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3199 }
3200
3201 // Metadata can be dependent on the condition we are hoisting above.
3202 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3203 // to avoid making it appear as if the condition is a constant, which would
3204 // be misleading while debugging.
3205 // Similarly strip attributes that maybe dependent on condition we are
3206 // hoisting above.
3207 for (auto &I : make_early_inc_range(*ThenBB)) {
3208 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3209 // Don't update the DILocation of dbg.assign intrinsics.
3210 if (!isa<DbgAssignIntrinsic>(&I))
3211 I.setDebugLoc(DebugLoc());
3212 }
3213 I.dropUBImplyingAttrsAndMetadata();
3214
3215 // Drop ephemeral values.
3216 if (EphTracker.contains(&I)) {
3217 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3218 I.eraseFromParent();
3219 }
3220 }
3221
3222 // Hoist the instructions.
3223 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3224 // to these instructions, in the same way that dbg.value intrinsics are
3225 // dropped at the end of this block.
3226 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3227 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3228 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3229 // equivalent).
3230 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3231 !DVR || !DVR->isDbgAssign())
3232 It.dropOneDbgRecord(&DR);
3233 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3234 std::prev(ThenBB->end()));
3235
3236 // Insert selects and rewrite the PHI operands.
3237 IRBuilder<NoFolder> Builder(BI);
3238 for (PHINode &PN : EndBB->phis()) {
3239 unsigned OrigI = PN.getBasicBlockIndex(BB);
3240 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3241 Value *OrigV = PN.getIncomingValue(OrigI);
3242 Value *ThenV = PN.getIncomingValue(ThenI);
3243
3244 // Skip PHIs which are trivial.
3245 if (OrigV == ThenV)
3246 continue;
3247
3248 // Create a select whose true value is the speculatively executed value and
3249 // false value is the pre-existing value. Swap them if the branch
3250 // destinations were inverted.
3251 Value *TrueV = ThenV, *FalseV = OrigV;
3252 if (Invert)
3253 std::swap(TrueV, FalseV);
3254 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3255 PN.setIncomingValue(OrigI, V);
3256 PN.setIncomingValue(ThenI, V);
3257 }
3258
3259 // Remove speculated dbg intrinsics.
3260 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3261 // dbg value for the different flows and inserting it after the select.
3262 for (Instruction *I : SpeculatedDbgIntrinsics) {
3263 // We still want to know that an assignment took place so don't remove
3264 // dbg.assign intrinsics.
3265 if (!isa<DbgAssignIntrinsic>(I))
3266 I->eraseFromParent();
3267 }
3268
3269 ++NumSpeculations;
3270 return true;
3271}
3272
3273/// Return true if we can thread a branch across this block.
3275 int Size = 0;
3276 EphemeralValueTracker EphTracker;
3277
3278 // Walk the loop in reverse so that we can identify ephemeral values properly
3279 // (values only feeding assumes).
3280 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3281 // Can't fold blocks that contain noduplicate or convergent calls.
3282 if (CallInst *CI = dyn_cast<CallInst>(&I))
3283 if (CI->cannotDuplicate() || CI->isConvergent())
3284 return false;
3285
3286 // Ignore ephemeral values which are deleted during codegen.
3287 // We will delete Phis while threading, so Phis should not be accounted in
3288 // block's size.
3289 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3290 if (Size++ > MaxSmallBlockSize)
3291 return false; // Don't clone large BB's.
3292 }
3293
3294 // We can only support instructions that do not define values that are
3295 // live outside of the current basic block.
3296 for (User *U : I.users()) {
3297 Instruction *UI = cast<Instruction>(U);
3298 if (UI->getParent() != BB || isa<PHINode>(UI))
3299 return false;
3300 }
3301
3302 // Looks ok, continue checking.
3303 }
3304
3305 return true;
3306}
3307
3309 BasicBlock *To) {
3310 // Don't look past the block defining the value, we might get the value from
3311 // a previous loop iteration.
3312 auto *I = dyn_cast<Instruction>(V);
3313 if (I && I->getParent() == To)
3314 return nullptr;
3315
3316 // We know the value if the From block branches on it.
3317 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3318 if (BI && BI->isConditional() && BI->getCondition() == V &&
3319 BI->getSuccessor(0) != BI->getSuccessor(1))
3320 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3322
3323 return nullptr;
3324}
3325
3326/// If we have a conditional branch on something for which we know the constant
3327/// value in predecessors (e.g. a phi node in the current block), thread edges
3328/// from the predecessor to their ultimate destination.
3329static std::optional<bool>
3331 const DataLayout &DL,
3332 AssumptionCache *AC) {
3334 BasicBlock *BB = BI->getParent();
3335 Value *Cond = BI->getCondition();
3336 PHINode *PN = dyn_cast<PHINode>(Cond);
3337 if (PN && PN->getParent() == BB) {
3338 // Degenerate case of a single entry PHI.
3339 if (PN->getNumIncomingValues() == 1) {
3341 return true;
3342 }
3343
3344 for (Use &U : PN->incoming_values())
3345 if (auto *CB = dyn_cast<ConstantInt>(U))
3346 KnownValues[CB].insert(PN->getIncomingBlock(U));
3347 } else {
3348 for (BasicBlock *Pred : predecessors(BB)) {
3349 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3350 KnownValues[CB].insert(Pred);
3351 }
3352 }
3353
3354 if (KnownValues.empty())
3355 return false;
3356
3357 // Now we know that this block has multiple preds and two succs.
3358 // Check that the block is small enough and values defined in the block are
3359 // not used outside of it.
3361 return false;
3362
3363 for (const auto &Pair : KnownValues) {
3364 // Okay, we now know that all edges from PredBB should be revectored to
3365 // branch to RealDest.
3366 ConstantInt *CB = Pair.first;
3367 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3368 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3369
3370 if (RealDest == BB)
3371 continue; // Skip self loops.
3372
3373 // Skip if the predecessor's terminator is an indirect branch.
3374 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3375 return isa<IndirectBrInst>(PredBB->getTerminator());
3376 }))
3377 continue;
3378
3379 LLVM_DEBUG({
3380 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3381 << " has value " << *Pair.first << " in predecessors:\n";
3382 for (const BasicBlock *PredBB : Pair.second)
3383 dbgs() << " " << PredBB->getName() << "\n";
3384 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3385 });
3386
3387 // Split the predecessors we are threading into a new edge block. We'll
3388 // clone the instructions into this block, and then redirect it to RealDest.
3389 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3390
3391 // TODO: These just exist to reduce test diff, we can drop them if we like.
3392 EdgeBB->setName(RealDest->getName() + ".critedge");
3393 EdgeBB->moveBefore(RealDest);
3394
3395 // Update PHI nodes.
3396 addPredecessorToBlock(RealDest, EdgeBB, BB);
3397
3398 // BB may have instructions that are being threaded over. Clone these
3399 // instructions into EdgeBB. We know that there will be no uses of the
3400 // cloned instructions outside of EdgeBB.
3401 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3402 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3403 TranslateMap[Cond] = CB;
3404
3405 // RemoveDIs: track instructions that we optimise away while folding, so
3406 // that we can copy DbgVariableRecords from them later.
3407 BasicBlock::iterator SrcDbgCursor = BB->begin();
3408 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3409 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3410 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3411 continue;
3412 }
3413 // Clone the instruction.
3414 Instruction *N = BBI->clone();
3415 // Insert the new instruction into its new home.
3416 N->insertInto(EdgeBB, InsertPt);
3417
3418 if (BBI->hasName())
3419 N->setName(BBI->getName() + ".c");
3420
3421 // Update operands due to translation.
3422 for (Use &Op : N->operands()) {
3423 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3424 if (PI != TranslateMap.end())
3425 Op = PI->second;
3426 }
3427
3428 // Check for trivial simplification.
3429 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3430 if (!BBI->use_empty())
3431 TranslateMap[&*BBI] = V;
3432 if (!N->mayHaveSideEffects()) {
3433 N->eraseFromParent(); // Instruction folded away, don't need actual
3434 // inst
3435 N = nullptr;
3436 }
3437 } else {
3438 if (!BBI->use_empty())
3439 TranslateMap[&*BBI] = N;
3440 }
3441 if (N) {
3442 // Copy all debug-info attached to instructions from the last we
3443 // successfully clone, up to this instruction (they might have been
3444 // folded away).
3445 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3446 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3447 SrcDbgCursor = std::next(BBI);
3448 // Clone debug-info on this instruction too.
3449 N->cloneDebugInfoFrom(&*BBI);
3450
3451 // Register the new instruction with the assumption cache if necessary.
3452 if (auto *Assume = dyn_cast<AssumeInst>(N))
3453 if (AC)
3454 AC->registerAssumption(Assume);
3455 }
3456 }
3457
3458 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3459 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3460 InsertPt->cloneDebugInfoFrom(BI);
3461
3462 BB->removePredecessor(EdgeBB);
3463 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3464 EdgeBI->setSuccessor(0, RealDest);
3465 EdgeBI->setDebugLoc(BI->getDebugLoc());
3466
3467 if (DTU) {
3469 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3470 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3471 DTU->applyUpdates(Updates);
3472 }
3473
3474 // For simplicity, we created a separate basic block for the edge. Merge
3475 // it back into the predecessor if possible. This not only avoids
3476 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3477 // bypass the check for trivial cycles above.
3478 MergeBlockIntoPredecessor(EdgeBB, DTU);
3479
3480 // Signal repeat, simplifying any other constants.
3481 return std::nullopt;
3482 }
3483
3484 return false;
3485}
3486
3488 DomTreeUpdater *DTU,
3489 const DataLayout &DL,
3490 AssumptionCache *AC) {
3491 std::optional<bool> Result;
3492 bool EverChanged = false;
3493 do {
3494 // Note that None means "we changed things, but recurse further."
3495 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3496 EverChanged |= Result == std::nullopt || *Result;
3497 } while (Result == std::nullopt);
3498 return EverChanged;
3499}
3500
3501/// Given a BB that starts with the specified two-entry PHI node,
3502/// see if we can eliminate it.
3504 DomTreeUpdater *DTU, const DataLayout &DL,
3505 bool SpeculateUnpredictables) {
3506 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3507 // statement", which has a very simple dominance structure. Basically, we
3508 // are trying to find the condition that is being branched on, which
3509 // subsequently causes this merge to happen. We really want control
3510 // dependence information for this check, but simplifycfg can't keep it up
3511 // to date, and this catches most of the cases we care about anyway.
3512 BasicBlock *BB = PN->getParent();
3513
3514 BasicBlock *IfTrue, *IfFalse;
3515 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3516 if (!DomBI)
3517 return false;
3518 Value *IfCond = DomBI->getCondition();
3519 // Don't bother if the branch will be constant folded trivially.
3520 if (isa<ConstantInt>(IfCond))
3521 return false;
3522
3523 BasicBlock *DomBlock = DomBI->getParent();
3526 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3527 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3528 });
3529 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3530 "Will have either one or two blocks to speculate.");
3531
3532 // If the branch is non-unpredictable, see if we either predictably jump to
3533 // the merge bb (if we have only a single 'then' block), or if we predictably
3534 // jump to one specific 'then' block (if we have two of them).
3535 // It isn't beneficial to speculatively execute the code
3536 // from the block that we know is predictably not entered.
3537 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3538 if (!IsUnpredictable) {
3539 uint64_t TWeight, FWeight;
3540 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3541 (TWeight + FWeight) != 0) {
3542 BranchProbability BITrueProb =
3543 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3545 BranchProbability BIFalseProb = BITrueProb.getCompl();
3546 if (IfBlocks.size() == 1) {
3547 BranchProbability BIBBProb =
3548 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3549 if (BIBBProb >= Likely)
3550 return false;
3551 } else {
3552 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3553 return false;
3554 }
3555 }
3556 }
3557
3558 // Don't try to fold an unreachable block. For example, the phi node itself
3559 // can't be the candidate if-condition for a select that we want to form.
3560 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3561 if (IfCondPhiInst->getParent() == BB)
3562 return false;
3563
3564 // Okay, we found that we can merge this two-entry phi node into a select.
3565 // Doing so would require us to fold *all* two entry phi nodes in this block.
3566 // At some point this becomes non-profitable (particularly if the target
3567 // doesn't support cmov's). Only do this transformation if there are two or
3568 // fewer PHI nodes in this block.
3569 unsigned NumPhis = 0;
3570 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3571 if (NumPhis > 2)
3572 return false;
3573
3574 // Loop over the PHI's seeing if we can promote them all to select
3575 // instructions. While we are at it, keep track of the instructions
3576 // that need to be moved to the dominating block.
3577 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3579 InstructionCost Budget =
3581 if (SpeculateUnpredictables && IsUnpredictable)
3582 Budget += TTI.getBranchMispredictPenalty();
3583
3584 bool Changed = false;
3585 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3586 PHINode *PN = cast<PHINode>(II++);
3587 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3588 PN->replaceAllUsesWith(V);
3589 PN->eraseFromParent();
3590 Changed = true;
3591 continue;
3592 }
3593
3594 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3595 Cost, Budget, TTI) ||
3596 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3597 Cost, Budget, TTI))
3598 return Changed;
3599 }
3600
3601 // If we folded the first phi, PN dangles at this point. Refresh it. If
3602 // we ran out of PHIs then we simplified them all.
3603 PN = dyn_cast<PHINode>(BB->begin());
3604 if (!PN)
3605 return true;
3606
3607 // Return true if at least one of these is a 'not', and another is either
3608 // a 'not' too, or a constant.
3609 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3610 if (!match(V0, m_Not(m_Value())))
3611 std::swap(V0, V1);
3612 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3613 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3614 };
3615
3616 // Don't fold i1 branches on PHIs which contain binary operators or
3617 // (possibly inverted) select form of or/ands, unless one of
3618 // the incoming values is an 'not' and another one is freely invertible.
3619 // These can often be turned into switches and other things.
3620 auto IsBinOpOrAnd = [](Value *V) {
3621 return match(
3622 V, m_CombineOr(
3623 m_BinOp(),
3626 };
3627 if (PN->getType()->isIntegerTy(1) &&
3628 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3629 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3630 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3631 PN->getIncomingValue(1)))
3632 return Changed;
3633
3634 // If all PHI nodes are promotable, check to make sure that all instructions
3635 // in the predecessor blocks can be promoted as well. If not, we won't be able
3636 // to get rid of the control flow, so it's not worth promoting to select
3637 // instructions.
3638 for (BasicBlock *IfBlock : IfBlocks)
3639 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3640 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3641 // This is not an aggressive instruction that we can promote.
3642 // Because of this, we won't be able to get rid of the control flow, so
3643 // the xform is not worth it.
3644 return Changed;
3645 }
3646
3647 // If either of the blocks has it's address taken, we can't do this fold.
3648 if (any_of(IfBlocks,
3649 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3650 return Changed;
3651
3652 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3653 if (IsUnpredictable) dbgs() << " (unpredictable)";
3654 dbgs() << " T: " << IfTrue->getName()
3655 << " F: " << IfFalse->getName() << "\n");
3656
3657 // If we can still promote the PHI nodes after this gauntlet of tests,
3658 // do all of the PHI's now.
3659
3660 // Move all 'aggressive' instructions, which are defined in the
3661 // conditional parts of the if's up to the dominating block.
3662 for (BasicBlock *IfBlock : IfBlocks)
3663 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3664
3665 IRBuilder<NoFolder> Builder(DomBI);
3666 // Propagate fast-math-flags from phi nodes to replacement selects.
3667 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3668 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3669 if (isa<FPMathOperator>(PN))
3670 Builder.setFastMathFlags(PN->getFastMathFlags());
3671
3672 // Change the PHI node into a select instruction.
3673 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3674 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3675
3676 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3677 PN->replaceAllUsesWith(Sel);
3678 Sel->takeName(PN);
3679 PN->eraseFromParent();
3680 }
3681
3682 // At this point, all IfBlocks are empty, so our if statement
3683 // has been flattened. Change DomBlock to jump directly to our new block to
3684 // avoid other simplifycfg's kicking in on the diamond.
3685 Builder.CreateBr(BB);
3686
3688 if (DTU) {
3689 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3690 for (auto *Successor : successors(DomBlock))
3691 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3692 }
3693
3694 DomBI->eraseFromParent();
3695 if (DTU)
3696 DTU->applyUpdates(Updates);
3697
3698 return true;
3699}
3700
3702 Instruction::BinaryOps Opc, Value *LHS,
3703 Value *RHS, const Twine &Name = "") {
3704 // Try to relax logical op to binary op.
3705 if (impliesPoison(RHS, LHS))
3706 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3707 if (Opc == Instruction::And)
3708 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3709 if (Opc == Instruction::Or)
3710 return Builder.CreateLogicalOr(LHS, RHS, Name);
3711 llvm_unreachable("Invalid logical opcode");
3712}
3713
3714/// Return true if either PBI or BI has branch weight available, and store
3715/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3716/// not have branch weight, use 1:1 as its weight.
3718 uint64_t &PredTrueWeight,
3719 uint64_t &PredFalseWeight,
3720 uint64_t &SuccTrueWeight,
3721 uint64_t &SuccFalseWeight) {
3722 bool PredHasWeights =
3723 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3724 bool SuccHasWeights =
3725 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3726 if (PredHasWeights || SuccHasWeights) {
3727 if (!PredHasWeights)
3728 PredTrueWeight = PredFalseWeight = 1;
3729 if (!SuccHasWeights)
3730 SuccTrueWeight = SuccFalseWeight = 1;
3731 return true;
3732 } else {
3733 return false;
3734 }
3735}
3736
3737/// Determine if the two branches share a common destination and deduce a glue
3738/// that joins the branches' conditions to arrive at the common destination if
3739/// that would be profitable.
3740static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3742 const TargetTransformInfo *TTI) {
3743 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3744 "Both blocks must end with a conditional branches.");
3746 "PredBB must be a predecessor of BB.");
3747
3748 // We have the potential to fold the conditions together, but if the
3749 // predecessor branch is predictable, we may not want to merge them.
3750 uint64_t PTWeight, PFWeight;
3751 BranchProbability PBITrueProb, Likely;
3752 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3753 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3754 (PTWeight + PFWeight) != 0) {
3755 PBITrueProb =
3756 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3758 }
3759
3760 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3761 // Speculate the 2nd condition unless the 1st is probably true.
3762 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3763 return {{BI->getSuccessor(0), Instruction::Or, false}};
3764 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3765 // Speculate the 2nd condition unless the 1st is probably false.
3766 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3767 return {{BI->getSuccessor(1), Instruction::And, false}};
3768 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3769 // Speculate the 2nd condition unless the 1st is probably true.
3770 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3771 return {{BI->getSuccessor(1), Instruction::And, true}};
3772 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3773 // Speculate the 2nd condition unless the 1st is probably false.
3774 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3775 return {{BI->getSuccessor(0), Instruction::Or, true}};
3776 }
3777 return std::nullopt;
3778}
3779
3781 DomTreeUpdater *DTU,
3782 MemorySSAUpdater *MSSAU,
3783 const TargetTransformInfo *TTI) {
3784 BasicBlock *BB = BI->getParent();
3785 BasicBlock *PredBlock = PBI->getParent();
3786
3787 // Determine if the two branches share a common destination.
3788 BasicBlock *CommonSucc;
3790 bool InvertPredCond;
3791 std::tie(CommonSucc, Opc, InvertPredCond) =
3793
3794 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3795
3796 IRBuilder<> Builder(PBI);
3797 // The builder is used to create instructions to eliminate the branch in BB.
3798 // If BB's terminator has !annotation metadata, add it to the new
3799 // instructions.
3801 {LLVMContext::MD_annotation});
3802
3803 // If we need to invert the condition in the pred block to match, do so now.
3804 if (InvertPredCond) {
3805 InvertBranch(PBI, Builder);
3806 }
3807
3808 BasicBlock *UniqueSucc =
3809 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3810
3811 // Before cloning instructions, notify the successor basic block that it
3812 // is about to have a new predecessor. This will update PHI nodes,
3813 // which will allow us to update live-out uses of bonus instructions.
3814 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3815
3816 // Try to update branch weights.
3817 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3818 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3819 SuccTrueWeight, SuccFalseWeight)) {
3820 SmallVector<uint64_t, 8> NewWeights;
3821
3822 if (PBI->getSuccessor(0) == BB) {
3823 // PBI: br i1 %x, BB, FalseDest
3824 // BI: br i1 %y, UniqueSucc, FalseDest
3825 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3826 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3827 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3828 // TrueWeight for PBI * FalseWeight for BI.
3829 // We assume that total weights of a BranchInst can fit into 32 bits.
3830 // Therefore, we will not have overflow using 64-bit arithmetic.
3831 NewWeights.push_back(PredFalseWeight *
3832 (SuccFalseWeight + SuccTrueWeight) +
3833 PredTrueWeight * SuccFalseWeight);
3834 } else {
3835 // PBI: br i1 %x, TrueDest, BB
3836 // BI: br i1 %y, TrueDest, UniqueSucc
3837 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3838 // FalseWeight for PBI * TrueWeight for BI.
3839 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3840 PredFalseWeight * SuccTrueWeight);
3841 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3842 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3843 }
3844
3845 // Halve the weights if any of them cannot fit in an uint32_t
3846 fitWeights(NewWeights);
3847
3848 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3849 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
3850
3851 // TODO: If BB is reachable from all paths through PredBlock, then we
3852 // could replace PBI's branch probabilities with BI's.
3853 } else
3854 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3855
3856 // Now, update the CFG.
3857 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3858
3859 if (DTU)
3860 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3861 {DominatorTree::Delete, PredBlock, BB}});
3862
3863 // If BI was a loop latch, it may have had associated loop metadata.
3864 // We need to copy it to the new latch, that is, PBI.
3865 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3866 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3867
3868 ValueToValueMapTy VMap; // maps original values to cloned values
3870
3871 Module *M = BB->getModule();
3872
3873 if (PredBlock->IsNewDbgInfoFormat) {
3874 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3875 for (DbgVariableRecord &DVR :
3877 RemapDbgRecord(M, &DVR, VMap,
3879 }
3880 }
3881
3882 // Now that the Cond was cloned into the predecessor basic block,
3883 // or/and the two conditions together.
3884 Value *BICond = VMap[BI->getCondition()];
3885 PBI->setCondition(
3886 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3887
3888 ++NumFoldBranchToCommonDest;
3889 return true;
3890}
3891
3892/// Return if an instruction's type or any of its operands' types are a vector
3893/// type.
3894static bool isVectorOp(Instruction &I) {
3895 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3896 return U->getType()->isVectorTy();
3897 });
3898}
3899
3900/// If this basic block is simple enough, and if a predecessor branches to us
3901/// and one of our successors, fold the block into the predecessor and use
3902/// logical operations to pick the right destination.
3904 MemorySSAUpdater *MSSAU,
3905 const TargetTransformInfo *TTI,
3906 unsigned BonusInstThreshold) {
3907 // If this block ends with an unconditional branch,
3908 // let speculativelyExecuteBB() deal with it.
3909 if (!BI->isConditional())
3910 return false;
3911
3912 BasicBlock *BB = BI->getParent();
3916
3917 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3918
3919 if (!Cond ||
3920 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3921 !isa<SelectInst>(Cond)) ||
3922 Cond->getParent() != BB || !Cond->hasOneUse())
3923 return false;
3924
3925 // Finally, don't infinitely unroll conditional loops.
3926 if (is_contained(successors(BB), BB))
3927 return false;
3928
3929 // With which predecessors will we want to deal with?
3931 for (BasicBlock *PredBlock : predecessors(BB)) {
3932 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3933
3934 // Check that we have two conditional branches. If there is a PHI node in
3935 // the common successor, verify that the same value flows in from both
3936 // blocks.
3937 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
3938 continue;
3939
3940 // Determine if the two branches share a common destination.
3941 BasicBlock *CommonSucc;
3943 bool InvertPredCond;
3944 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3945 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3946 else
3947 continue;
3948
3949 // Check the cost of inserting the necessary logic before performing the
3950 // transformation.
3951 if (TTI) {
3952 Type *Ty = BI->getCondition()->getType();
3954 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3955 !isa<CmpInst>(PBI->getCondition())))
3956 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3957
3959 continue;
3960 }
3961
3962 // Ok, we do want to deal with this predecessor. Record it.
3963 Preds.emplace_back(PredBlock);
3964 }
3965
3966 // If there aren't any predecessors into which we can fold,
3967 // don't bother checking the cost.
3968 if (Preds.empty())
3969 return false;
3970
3971 // Only allow this transformation if computing the condition doesn't involve
3972 // too many instructions and these involved instructions can be executed
3973 // unconditionally. We denote all involved instructions except the condition
3974 // as "bonus instructions", and only allow this transformation when the
3975 // number of the bonus instructions we'll need to create when cloning into
3976 // each predecessor does not exceed a certain threshold.
3977 unsigned NumBonusInsts = 0;
3978 bool SawVectorOp = false;
3979 const unsigned PredCount = Preds.size();
3980 for (Instruction &I : *BB) {
3981 // Don't check the branch condition comparison itself.
3982 if (&I == Cond)
3983 continue;
3984 // Ignore dbg intrinsics, and the terminator.
3985 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3986 continue;
3987 // I must be safe to execute unconditionally.
3989 return false;
3990 SawVectorOp |= isVectorOp(I);
3991
3992 // Account for the cost of duplicating this instruction into each
3993 // predecessor. Ignore free instructions.
3994 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3996 NumBonusInsts += PredCount;
3997
3998 // Early exits once we reach the limit.
3999 if (NumBonusInsts >
4000 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4001 return false;
4002 }
4003
4004 auto IsBCSSAUse = [BB, &I](Use &U) {
4005 auto *UI = cast<Instruction>(U.getUser());
4006 if (auto *PN = dyn_cast<PHINode>(UI))
4007 return PN->getIncomingBlock(U) == BB;
4008 return UI->getParent() == BB && I.comesBefore(UI);
4009 };
4010
4011 // Does this instruction require rewriting of uses?
4012 if (!all_of(I.uses(), IsBCSSAUse))
4013 return false;
4014 }
4015 if (NumBonusInsts >
4016 BonusInstThreshold *
4017 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4018 return false;
4019
4020 // Ok, we have the budget. Perform the transformation.
4021 for (BasicBlock *PredBlock : Preds) {
4022 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4023 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4024 }
4025 return false;
4026}
4027
4028// If there is only one store in BB1 and BB2, return it, otherwise return
4029// nullptr.
4031 StoreInst *S = nullptr;
4032 for (auto *BB : {BB1, BB2}) {
4033 if (!BB)
4034 continue;
4035 for (auto &I : *BB)
4036 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4037 if (S)
4038 // Multiple stores seen.
4039 return nullptr;
4040 else
4041 S = SI;
4042 }
4043 }
4044 return S;
4045}
4046
4048 Value *AlternativeV = nullptr) {
4049 // PHI is going to be a PHI node that allows the value V that is defined in
4050 // BB to be referenced in BB's only successor.
4051 //
4052 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4053 // doesn't matter to us what the other operand is (it'll never get used). We
4054 // could just create a new PHI with an undef incoming value, but that could
4055 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4056 // other PHI. So here we directly look for some PHI in BB's successor with V
4057 // as an incoming operand. If we find one, we use it, else we create a new
4058 // one.
4059 //
4060 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4061 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4062 // where OtherBB is the single other predecessor of BB's only successor.
4063 PHINode *PHI = nullptr;
4064 BasicBlock *Succ = BB->getSingleSuccessor();
4065
4066 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4067 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4068 PHI = cast<PHINode>(I);
4069 if (!AlternativeV)
4070 break;
4071
4072 assert(Succ->hasNPredecessors(2));
4073 auto PredI = pred_begin(Succ);
4074 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4075 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4076 break;
4077 PHI = nullptr;
4078 }
4079 if (PHI)
4080 return PHI;
4081
4082 // If V is not an instruction defined in BB, just return it.
4083 if (!AlternativeV &&
4084 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4085 return V;
4086
4087 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4088 PHI->insertBefore(Succ->begin());
4089 PHI->addIncoming(V, BB);
4090 for (BasicBlock *PredBB : predecessors(Succ))
4091 if (PredBB != BB)
4092 PHI->addIncoming(
4093 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4094 return PHI;
4095}
4096
4098 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4099 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4100 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4101 // For every pointer, there must be exactly two stores, one coming from
4102 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4103 // store (to any address) in PTB,PFB or QTB,QFB.
4104 // FIXME: We could relax this restriction with a bit more work and performance
4105 // testing.
4106 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4107 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4108 if (!PStore || !QStore)
4109 return false;
4110
4111 // Now check the stores are compatible.
4112 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4113 PStore->getValueOperand()->getType() !=
4114 QStore->getValueOperand()->getType())
4115 return false;
4116
4117 // Check that sinking the store won't cause program behavior changes. Sinking
4118 // the store out of the Q blocks won't change any behavior as we're sinking
4119 // from a block to its unconditional successor. But we're moving a store from
4120 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4121 // So we need to check that there are no aliasing loads or stores in
4122 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4123 // operations between PStore and the end of its parent block.
4124 //
4125 // The ideal way to do this is to query AliasAnalysis, but we don't
4126 // preserve AA currently so that is dangerous. Be super safe and just
4127 // check there are no other memory operations at all.
4128 for (auto &I : *QFB->getSinglePredecessor())
4129 if (I.mayReadOrWriteMemory())
4130 return false;
4131 for (auto &I : *QFB)
4132 if (&I != QStore && I.mayReadOrWriteMemory())
4133 return false;
4134 if (QTB)
4135 for (auto &I : *QTB)
4136 if (&I != QStore && I.mayReadOrWriteMemory())
4137 return false;
4138 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4139 I != E; ++I)
4140 if (&*I != PStore && I->mayReadOrWriteMemory())
4141 return false;
4142
4143 // If we're not in aggressive mode, we only optimize if we have some
4144 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4145 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4146 if (!BB)
4147 return true;
4148 // Heuristic: if the block can be if-converted/phi-folded and the
4149 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4150 // thread this store.
4152 InstructionCost Budget =
4154 for (auto &I : BB->instructionsWithoutDebug(false)) {
4155 // Consider terminator instruction to be free.
4156 if (I.isTerminator())
4157 continue;
4158 // If this is one the stores that we want to speculate out of this BB,
4159 // then don't count it's cost, consider it to be free.
4160 if (auto *S = dyn_cast<StoreInst>(&I))
4161 if (llvm::find(FreeStores, S))
4162 continue;
4163 // Else, we have a white-list of instructions that we are ak speculating.
4164 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4165 return false; // Not in white-list - not worthwhile folding.
4166 // And finally, if this is a non-free instruction that we are okay
4167 // speculating, ensure that we consider the speculation budget.
4168 Cost +=
4170 if (Cost > Budget)
4171 return false; // Eagerly refuse to fold as soon as we're out of budget.
4172 }
4173 assert(Cost <= Budget &&
4174 "When we run out of budget we will eagerly return from within the "
4175 "per-instruction loop.");
4176 return true;
4177 };
4178
4179 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4181 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4182 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4183 return false;
4184
4185 // If PostBB has more than two predecessors, we need to split it so we can
4186 // sink the store.
4187 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4188 // We know that QFB's only successor is PostBB. And QFB has a single
4189 // predecessor. If QTB exists, then its only successor is also PostBB.
4190 // If QTB does not exist, then QFB's only predecessor has a conditional
4191 // branch to QFB and PostBB.
4192 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4193 BasicBlock *NewBB =
4194 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4195 if (!NewBB)
4196 return false;
4197 PostBB = NewBB;
4198 }
4199
4200 // OK, we're going to sink the stores to PostBB. The store has to be
4201 // conditional though, so first create the predicate.
4202 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4203 ->getCondition();
4204 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4205 ->getCondition();
4206
4208 PStore->getParent());
4210 QStore->getParent(), PPHI);
4211
4212 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4213 IRBuilder<> QB(PostBB, PostBBFirst);
4214 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4215
4216 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4217 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4218
4219 if (InvertPCond)
4220 PPred = QB.CreateNot(PPred);
4221 if (InvertQCond)
4222 QPred = QB.CreateNot(QPred);
4223 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4224
4225 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4226 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4227 /*Unreachable=*/false,
4228 /*BranchWeights=*/nullptr, DTU);
4229
4230 QB.SetInsertPoint(T);
4231 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4232 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4233 // Choose the minimum alignment. If we could prove both stores execute, we
4234 // could use biggest one. In this case, though, we only know that one of the
4235 // stores executes. And we don't know it's safe to take the alignment from a
4236 // store that doesn't execute.
4237 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4238
4239 QStore->eraseFromParent();
4240 PStore->eraseFromParent();
4241
4242 return true;
4243}
4244
4246 DomTreeUpdater *DTU, const DataLayout &DL,
4247 const TargetTransformInfo &TTI) {
4248 // The intention here is to find diamonds or triangles (see below) where each
4249 // conditional block contains a store to the same address. Both of these
4250 // stores are conditional, so they can't be unconditionally sunk. But it may
4251 // be profitable to speculatively sink the stores into one merged store at the
4252 // end, and predicate the merged store on the union of the two conditions of
4253 // PBI and QBI.
4254 //
4255 // This can reduce the number of stores executed if both of the conditions are
4256 // true, and can allow the blocks to become small enough to be if-converted.
4257 // This optimization will also chain, so that ladders of test-and-set
4258 // sequences can be if-converted away.
4259 //
4260 // We only deal with simple diamonds or triangles:
4261 //
4262 // PBI or PBI or a combination of the two
4263 // / \ | \
4264 // PTB PFB | PFB
4265 // \ / | /
4266 // QBI QBI
4267 // / \ | \
4268 // QTB QFB | QFB
4269 // \ / | /
4270 // PostBB PostBB
4271 //
4272 // We model triangles as a type of diamond with a nullptr "true" block.
4273 // Triangles are canonicalized so that the fallthrough edge is represented by
4274 // a true condition, as in the diagram above.
4275 BasicBlock *PTB = PBI->getSuccessor(0);
4276 BasicBlock *PFB = PBI->getSuccessor(1);
4277 BasicBlock *QTB = QBI->getSuccessor(0);
4278 BasicBlock *QFB = QBI->getSuccessor(1);
4279 BasicBlock *PostBB = QFB->getSingleSuccessor();
4280
4281 // Make sure we have a good guess for PostBB. If QTB's only successor is
4282 // QFB, then QFB is a better PostBB.
4283 if (QTB->getSingleSuccessor() == QFB)
4284 PostBB = QFB;
4285
4286 // If we couldn't find a good PostBB, stop.
4287 if (!PostBB)
4288 return false;
4289
4290 bool InvertPCond = false, InvertQCond = false;
4291 // Canonicalize fallthroughs to the true branches.
4292 if (PFB == QBI->getParent()) {
4293 std::swap(PFB, PTB);
4294 InvertPCond = true;
4295 }
4296 if (QFB == PostBB) {
4297 std::swap(QFB, QTB);
4298 InvertQCond = true;
4299 }
4300
4301 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4302 // and QFB may not. Model fallthroughs as a nullptr block.
4303 if (PTB == QBI->getParent())
4304 PTB = nullptr;
4305 if (QTB == PostBB)
4306 QTB = nullptr;
4307
4308 // Legality bailouts. We must have at least the non-fallthrough blocks and
4309 // the post-dominating block, and the non-fallthroughs must only have one
4310 // predecessor.
4311 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4312 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4313 };
4314 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4315 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4316 return false;
4317 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4318 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4319 return false;
4320 if (!QBI->getParent()->hasNUses(2))
4321 return false;
4322
4323 // OK, this is a sequence of two diamonds or triangles.
4324 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4325 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4326 for (auto *BB : {PTB, PFB}) {
4327 if (!BB)
4328 continue;
4329 for (auto &I : *BB)
4330 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4331 PStoreAddresses.insert(SI->getPointerOperand());
4332 }
4333 for (auto *BB : {QTB, QFB}) {
4334 if (!BB)
4335 continue;
4336 for (auto &I : *BB)
4337 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4338 QStoreAddresses.insert(SI->getPointerOperand());
4339 }
4340
4341 set_intersect(PStoreAddresses, QStoreAddresses);
4342 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4343 // clear what it contains.
4344 auto &CommonAddresses = PStoreAddresses;
4345
4346 bool Changed = false;
4347 for (auto *Address : CommonAddresses)
4348 Changed |=
4349 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4350 InvertPCond, InvertQCond, DTU, DL, TTI);
4351 return Changed;
4352}
4353
4354/// If the previous block ended with a widenable branch, determine if reusing
4355/// the target block is profitable and legal. This will have the effect of
4356/// "widening" PBI, but doesn't require us to reason about hosting safety.
4358 DomTreeUpdater *DTU) {
4359 // TODO: This can be generalized in two important ways:
4360 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4361 // values from the PBI edge.
4362 // 2) We can sink side effecting instructions into BI's fallthrough
4363 // successor provided they doesn't contribute to computation of
4364 // BI's condition.
4365 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4366 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4367 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4368 !BI->getParent()->getSinglePredecessor())
4369 return false;
4370 if (!IfFalseBB->phis().empty())
4371 return false; // TODO
4372 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4373 // may undo the transform done here.
4374 // TODO: There might be a more fine-grained solution to this.
4375 if (!llvm::succ_empty(IfFalseBB))
4376 return false;
4377 // Use lambda to lazily compute expensive condition after cheap ones.
4378 auto NoSideEffects = [](BasicBlock &BB) {
4379 return llvm::none_of(BB, [](const Instruction &I) {
4380 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4381 });
4382 };
4383 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4384 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4385 NoSideEffects(*BI->getParent())) {
4386 auto *OldSuccessor = BI->getSuccessor(1);
4387 OldSuccessor->removePredecessor(BI->getParent());
4388 BI->setSuccessor(1, IfFalseBB);
4389 if (DTU)
4390 DTU->applyUpdates(
4391 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4392 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4393 return true;
4394 }
4395 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4396 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4397 NoSideEffects(*BI->getParent())) {
4398 auto *OldSuccessor = BI->getSuccessor(0);
4399 OldSuccessor->removePredecessor(BI->getParent());
4400 BI->setSuccessor(0, IfFalseBB);
4401 if (DTU)
4402 DTU->applyUpdates(
4403 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4404 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4405 return true;
4406 }
4407 return false;
4408}
4409
4410/// If we have a conditional branch as a predecessor of another block,
4411/// this function tries to simplify it. We know
4412/// that PBI and BI are both conditional branches, and BI is in one of the
4413/// successor blocks of PBI - PBI branches to BI.
4415 DomTreeUpdater *DTU,
4416 const DataLayout &DL,
4417 const TargetTransformInfo &TTI) {
4418 assert(PBI->isConditional() && BI->isConditional());
4419 BasicBlock *BB = BI->getParent();
4420
4421 // If this block ends with a branch instruction, and if there is a
4422 // predecessor that ends on a branch of the same condition, make
4423 // this conditional branch redundant.
4424 if (PBI->getCondition() == BI->getCondition() &&
4425 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4426 // Okay, the outcome of this conditional branch is statically
4427 // knowable. If this block had a single pred, handle specially, otherwise
4428 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4429 if (BB->getSinglePredecessor()) {
4430 // Turn this into a branch on constant.
4431 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4432 BI->setCondition(
4433 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4434 return true; // Nuke the branch on constant.
4435 }
4436 }
4437
4438 // If the previous block ended with a widenable branch, determine if reusing
4439 // the target block is profitable and legal. This will have the effect of
4440 // "widening" PBI, but doesn't require us to reason about hosting safety.
4441 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4442 return true;
4443
4444 // If both branches are conditional and both contain stores to the same
4445 // address, remove the stores from the conditionals and create a conditional
4446 // merged store at the end.
4447 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4448 return true;
4449
4450 // If this is a conditional branch in an empty block, and if any
4451 // predecessors are a conditional branch to one of our destinations,
4452 // fold the conditions into logical ops and one cond br.
4453
4454 // Ignore dbg intrinsics.
4455 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4456 return false;
4457
4458 int PBIOp, BIOp;
4459 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4460 PBIOp = 0;
4461 BIOp = 0;
4462 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4463 PBIOp = 0;
4464 BIOp = 1;
4465 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4466 PBIOp = 1;
4467 BIOp = 0;
4468 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4469 PBIOp = 1;
4470 BIOp = 1;
4471 } else {
4472 return false;
4473 }
4474
4475 // Check to make sure that the other destination of this branch
4476 // isn't BB itself. If so, this is an infinite loop that will
4477 // keep getting unwound.
4478 if (PBI->getSuccessor(PBIOp) == BB)
4479 return false;
4480
4481 // If predecessor's branch probability to BB is too low don't merge branches.
4482 SmallVector<uint32_t, 2> PredWeights;
4483 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4484 extractBranchWeights(*PBI, PredWeights) &&
4485 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4486
4488 PredWeights[PBIOp],
4489 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4490
4492 if (CommonDestProb >= Likely)
4493 return false;
4494 }
4495
4496 // Do not perform this transformation if it would require
4497 // insertion of a large number of select instructions. For targets
4498 // without predication/cmovs, this is a big pessimization.
4499
4500 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4501 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4502 unsigned NumPhis = 0;
4503 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4504 ++II, ++NumPhis) {
4505 if (NumPhis > 2) // Disable this xform.
4506 return false;
4507 }
4508
4509 // Finally, if everything is ok, fold the branches to logical ops.
4510 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4511
4512 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4513 << "AND: " << *BI->getParent());
4514
4516
4517 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4518 // branch in it, where one edge (OtherDest) goes back to itself but the other
4519 // exits. We don't *know* that the program avoids the infinite loop
4520 // (even though that seems likely). If we do this xform naively, we'll end up
4521 // recursively unpeeling the loop. Since we know that (after the xform is
4522 // done) that the block *is* infinite if reached, we just make it an obviously
4523 // infinite loop with no cond branch.
4524 if (OtherDest == BB) {
4525 // Insert it at the end of the function, because it's either code,
4526 // or it won't matter if it's hot. :)
4527 BasicBlock *InfLoopBlock =
4528 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4529 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4530 if (DTU)
4531 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4532 OtherDest = InfLoopBlock;
4533 }
4534
4535 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4536
4537 // BI may have other predecessors. Because of this, we leave
4538 // it alone, but modify PBI.
4539
4540 // Make sure we get to CommonDest on True&True directions.
4541 Value *PBICond = PBI->getCondition();
4542 IRBuilder<NoFolder> Builder(PBI);
4543 if (PBIOp)
4544 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4545
4546 Value *BICond = BI->getCondition();
4547 if (BIOp)
4548 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4549
4550 // Merge the conditions.
4551 Value *Cond =
4552 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4553
4554 // Modify PBI to branch on the new condition to the new dests.
4555 PBI->setCondition(Cond);
4556 PBI->setSuccessor(0, CommonDest);
4557 PBI->setSuccessor(1, OtherDest);
4558
4559 if (DTU) {
4560 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4561 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4562
4563 DTU->applyUpdates(Updates);
4564 }
4565
4566 // Update branch weight for PBI.
4567 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4568 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4569 bool HasWeights =
4570 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4571 SuccTrueWeight, SuccFalseWeight);
4572 if (HasWeights) {
4573 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4574 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4575 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4576 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4577 // The weight to CommonDest should be PredCommon * SuccTotal +
4578 // PredOther * SuccCommon.
4579 // The weight to OtherDest should be PredOther * SuccOther.
4580 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4581 PredOther * SuccCommon,
4582 PredOther * SuccOther};
4583 // Halve the weights if any of them cannot fit in an uint32_t
4584 fitWeights(NewWeights);
4585
4586 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4587 }
4588
4589 // OtherDest may have phi nodes. If so, add an entry from PBI's
4590 // block that are identical to the entries for BI's block.
4591 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4592
4593 // We know that the CommonDest already had an edge from PBI to
4594 // it. If it has PHIs though, the PHIs may have different
4595 // entries for BB and PBI's BB. If so, insert a select to make
4596 // them agree.
4597 for (PHINode &PN : CommonDest->phis()) {
4598 Value *BIV = PN.getIncomingValueForBlock(BB);
4599 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4600 Value *PBIV = PN.getIncomingValue(PBBIdx);
4601 if (BIV != PBIV) {
4602 // Insert a select in PBI to pick the right value.
4603 SelectInst *NV = cast<SelectInst>(
4604 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4605 PN.setIncomingValue(PBBIdx, NV);
4606 // Although the select has the same condition as PBI, the original branch
4607 // weights for PBI do not apply to the new select because the select's
4608 // 'logical' edges are incoming edges of the phi that is eliminated, not
4609 // the outgoing edges of PBI.
4610 if (HasWeights) {
4611 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4612 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4613 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4614 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4615 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4616 // The weight to PredOtherDest should be PredOther * SuccCommon.
4617 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4618 PredOther * SuccCommon};
4619
4620 fitWeights(NewWeights);
4621
4622 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4623 /*IsExpected=*/false);
4624 }
4625 }
4626 }
4627
4628 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4629 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4630
4631 // This basic block is probably dead. We know it has at least
4632 // one fewer predecessor.
4633 return true;
4634}
4635
4636// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4637// true or to FalseBB if Cond is false.
4638// Takes care of updating the successors and removing the old terminator.
4639// Also makes sure not to introduce new successors by assuming that edges to
4640// non-successor TrueBBs and FalseBBs aren't reachable.
4641bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4642 Value *Cond, BasicBlock *TrueBB,
4643 BasicBlock *FalseBB,
4644 uint32_t TrueWeight,
4645 uint32_t FalseWeight) {
4646 auto *BB = OldTerm->getParent();
4647 // Remove any superfluous successor edges from the CFG.
4648 // First, figure out which successors to preserve.
4649 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4650 // successor.
4651 BasicBlock *KeepEdge1 = TrueBB;
4652 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4653
4654 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4655
4656 // Then remove the rest.
4657 for (BasicBlock *Succ : successors(OldTerm)) {
4658 // Make sure only to keep exactly one copy of each edge.
4659 if (Succ == KeepEdge1)
4660 KeepEdge1 = nullptr;
4661 else if (Succ == KeepEdge2)
4662 KeepEdge2 = nullptr;
4663 else {
4664 Succ->removePredecessor(BB,
4665 /*KeepOneInputPHIs=*/true);
4666
4667 if (Succ != TrueBB && Succ != FalseBB)
4668 RemovedSuccessors.insert(Succ);
4669 }
4670 }
4671
4672 IRBuilder<> Builder(OldTerm);
4673 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4674
4675 // Insert an appropriate new terminator.
4676 if (!KeepEdge1 && !KeepEdge2) {
4677 if (TrueBB == FalseBB) {
4678 // We were only looking for one successor, and it was present.
4679 // Create an unconditional branch to it.
4680 Builder.CreateBr(TrueBB);
4681 } else {
4682 // We found both of the successors we were looking for.
4683 // Create a conditional branch sharing the condition of the select.
4684 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4685 if (TrueWeight != FalseWeight)
4686 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4687 }
4688 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4689 // Neither of the selected blocks were successors, so this
4690 // terminator must be unreachable.
4691 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4692 } else {
4693 // One of the selected values was a successor, but the other wasn't.
4694 // Insert an unconditional branch to the one that was found;
4695 // the edge to the one that wasn't must be unreachable.
4696 if (!KeepEdge1) {
4697 // Only TrueBB was found.
4698 Builder.CreateBr(TrueBB);
4699 } else {
4700 // Only FalseBB was found.
4701 Builder.CreateBr(FalseBB);
4702 }
4703 }
4704
4706
4707 if (DTU) {
4709 Updates.reserve(RemovedSuccessors.size());
4710 for (auto *RemovedSuccessor : RemovedSuccessors)
4711 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4712 DTU->applyUpdates(Updates);
4713 }
4714
4715 return true;
4716}
4717
4718// Replaces
4719// (switch (select cond, X, Y)) on constant X, Y
4720// with a branch - conditional if X and Y lead to distinct BBs,
4721// unconditional otherwise.
4722bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4723 SelectInst *Select) {
4724 // Check for constant integer values in the select.
4725 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4726 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4727 if (!TrueVal || !FalseVal)
4728 return false;
4729
4730 // Find the relevant condition and destinations.
4731 Value *Condition = Select->getCondition();
4732 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4733 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4734
4735 // Get weight for TrueBB and FalseBB.
4736 uint32_t TrueWeight = 0, FalseWeight = 0;
4738 bool HasWeights = hasBranchWeightMD(*SI);
4739 if (HasWeights) {
4740 getBranchWeights(SI, Weights);
4741 if (Weights.size() == 1 + SI->getNumCases()) {
4742 TrueWeight =
4743 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4744 FalseWeight =
4745 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4746 }
4747 }
4748
4749 // Perform the actual simplification.
4750 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4751 FalseWeight);
4752}
4753
4754// Replaces
4755// (indirectbr (select cond, blockaddress(@fn, BlockA),
4756// blockaddress(@fn, BlockB)))
4757// with
4758// (br cond, BlockA, BlockB).
4759bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4760 SelectInst *SI) {
4761 // Check that both operands of the select are block addresses.
4762 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4763 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4764 if (!TBA || !FBA)
4765 return false;
4766
4767 // Extract the actual blocks.
4768 BasicBlock *TrueBB = TBA->getBasicBlock();
4769 BasicBlock *FalseBB = FBA->getBasicBlock();
4770
4771 // Perform the actual simplification.
4772 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4773 0);
4774}
4775
4776/// This is called when we find an icmp instruction
4777/// (a seteq/setne with a constant) as the only instruction in a
4778/// block that ends with an uncond branch. We are looking for a very specific
4779/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4780/// this case, we merge the first two "or's of icmp" into a switch, but then the
4781/// default value goes to an uncond block with a seteq in it, we get something
4782/// like:
4783///
4784/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4785/// DEFAULT:
4786/// %tmp = icmp eq i8 %A, 92
4787/// br label %end
4788/// end:
4789/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4790///
4791/// We prefer to split the edge to 'end' so that there is a true/false entry to
4792/// the PHI, merging the third icmp into the switch.
4793bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4794 ICmpInst *ICI, IRBuilder<> &Builder) {
4795 BasicBlock *BB = ICI->getParent();
4796
4797 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4798 // complex.
4799 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4800 return false;
4801
4802 Value *V = ICI->getOperand(0);
4803 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4804
4805 // The pattern we're looking for is where our only predecessor is a switch on
4806 // 'V' and this block is the default case for the switch. In this case we can
4807 // fold the compared value into the switch to simplify things.
4808 BasicBlock *Pred = BB->getSinglePredecessor();
4809 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4810 return false;
4811
4812 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4813 if (SI->getCondition() != V)
4814 return false;
4815
4816 // If BB is reachable on a non-default case, then we simply know the value of
4817 // V in this block. Substitute it and constant fold the icmp instruction
4818 // away.
4819 if (SI->getDefaultDest() != BB) {
4820 ConstantInt *VVal = SI->findCaseDest(BB);
4821 assert(VVal && "Should have a unique destination value");
4822 ICI->setOperand(0, VVal);
4823
4824 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4825 ICI->replaceAllUsesWith(V);
4826 ICI->eraseFromParent();
4827 }
4828 // BB is now empty, so it is likely to simplify away.
4829 return requestResimplify();
4830 }
4831
4832 // Ok, the block is reachable from the default dest. If the constant we're
4833 // comparing exists in one of the other edges, then we can constant fold ICI
4834 // and zap it.
4835 if (SI->findCaseValue(Cst) != SI->case_default()) {
4836 Value *V;
4837 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4839 else
4841
4842 ICI->replaceAllUsesWith(V);
4843 ICI->eraseFromParent();
4844 // BB is now empty, so it is likely to simplify away.
4845 return requestResimplify();
4846 }
4847
4848 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4849 // the block.
4850 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4851 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4852 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4853 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4854 return false;
4855
4856 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4857 // true in the PHI.
4858 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4859 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4860
4861 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4862 std::swap(DefaultCst, NewCst);
4863
4864 // Replace ICI (which is used by the PHI for the default value) with true or
4865 // false depending on if it is EQ or NE.
4866 ICI->replaceAllUsesWith(DefaultCst);
4867 ICI->eraseFromParent();
4868
4870
4871 // Okay, the switch goes to this block on a default value. Add an edge from
4872 // the switch to the merge point on the compared value.
4873 BasicBlock *NewBB =
4874 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4875 {
4877 auto W0 = SIW.getSuccessorWeight(0);
4879 if (W0) {
4880 NewW = ((uint64_t(*W0) + 1) >> 1);
4881 SIW.setSuccessorWeight(0, *NewW);
4882 }
4883 SIW.addCase(Cst, NewBB, NewW);
4884 if (DTU)
4885 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4886 }
4887
4888 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4889 Builder.SetInsertPoint(NewBB);
4890 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4891 Builder.CreateBr(SuccBlock);
4892 PHIUse->addIncoming(NewCst, NewBB);
4893 if (DTU) {
4894 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4895 DTU->applyUpdates(Updates);
4896 }
4897 return true;
4898}
4899
4900/// The specified branch is a conditional branch.
4901/// Check to see if it is branching on an or/and chain of icmp instructions, and
4902/// fold it into a switch instruction if so.
4903bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
4904 IRBuilder<> &Builder,
4905 const DataLayout &DL) {
4906 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4907 if (!Cond)
4908 return false;
4909
4910 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4911 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4912 // 'setne's and'ed together, collect them.
4913
4914 // Try to gather values from a chain of and/or to be turned into a switch
4915 ConstantComparesGatherer ConstantCompare(Cond, DL);
4916 // Unpack the result
4917 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4918 Value *CompVal = ConstantCompare.CompValue;
4919 unsigned UsedICmps = ConstantCompare.UsedICmps;
4920 Value *ExtraCase = ConstantCompare.Extra;
4921
4922 // If we didn't have a multiply compared value, fail.
4923 if (!CompVal)
4924 return false;
4925
4926 // Avoid turning single icmps into a switch.
4927 if (UsedICmps <= 1)
4928 return false;
4929
4930 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4931
4932 // There might be duplicate constants in the list, which the switch
4933 // instruction can't handle, remove them now.
4934 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
4935 Values.erase(llvm::unique(Values), Values.end());
4936
4937 // If Extra was used, we require at least two switch values to do the
4938 // transformation. A switch with one value is just a conditional branch.
4939 if (ExtraCase && Values.size() < 2)
4940 return false;
4941
4942 // TODO: Preserve branch weight metadata, similarly to how
4943 // foldValueComparisonIntoPredecessors preserves it.
4944
4945 // Figure out which block is which destination.
4946 BasicBlock *DefaultBB = BI->getSuccessor(1);
4947 BasicBlock *EdgeBB = BI->getSuccessor(0);
4948 if (!TrueWhenEqual)
4949 std::swap(DefaultBB, EdgeBB);
4950
4951 BasicBlock *BB = BI->getParent();
4952
4953 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4954 << " cases into SWITCH. BB is:\n"
4955 << *BB);
4956
4958
4959 // If there are any extra values that couldn't be folded into the switch
4960 // then we evaluate them with an explicit branch first. Split the block
4961 // right before the condbr to handle it.
4962 if (ExtraCase) {
4963 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4964 /*MSSAU=*/nullptr, "switch.early.test");
4965
4966 // Remove the uncond branch added to the old block.
4967 Instruction *OldTI = BB->getTerminator();
4968 Builder.SetInsertPoint(OldTI);
4969
4970 // There can be an unintended UB if extra values are Poison. Before the
4971 // transformation, extra values may not be evaluated according to the
4972 // condition, and it will not raise UB. But after transformation, we are
4973 // evaluating extra values before checking the condition, and it will raise
4974 // UB. It can be solved by adding freeze instruction to extra values.
4975 AssumptionCache *AC = Options.AC;
4976
4977 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4978 ExtraCase = Builder.CreateFreeze(ExtraCase);
4979
4980 if (TrueWhenEqual)
4981 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4982 else
4983 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4984
4985 OldTI->eraseFromParent();
4986
4987 if (DTU)
4988 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4989
4990 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4991 // for the edge we just added.
4992 addPredecessorToBlock(EdgeBB, BB, NewBB);
4993
4994 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4995 << "\nEXTRABB = " << *BB);
4996 BB = NewBB;
4997 }
4998
4999 Builder.SetInsertPoint(BI);
5000 // Convert pointer to int before we switch.
5001 if (CompVal->getType()->isPointerTy()) {
5002 CompVal = Builder.CreatePtrToInt(
5003 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5004 }
5005
5006 // Create the new switch instruction now.
5007 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5008
5009 // Add all of the 'cases' to the switch instruction.
5010 for (unsigned i = 0, e = Values.size(); i != e; ++i)
5011 New->addCase(Values[i], EdgeBB);
5012
5013 // We added edges from PI to the EdgeBB. As such, if there were any
5014 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5015 // the number of edges added.
5016 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5017 PHINode *PN = cast<PHINode>(BBI);
5018 Value *InVal = PN->getIncomingValueForBlock(BB);
5019 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5020 PN->addIncoming(InVal, BB);
5021 }
5022
5023 // Erase the old branch instruction.
5025 if (DTU)
5026 DTU->applyUpdates(Updates);
5027
5028 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5029 return true;
5030}
5031
5032bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5033 if (isa<PHINode>(RI->getValue()))
5034 return simplifyCommonResume(RI);
5035 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5036 RI->getValue() == RI->getParent()->getFirstNonPHI())
5037 // The resume must unwind the exception that caused control to branch here.
5038 return simplifySingleResume(RI);
5039
5040 return false;
5041}
5042
5043// Check if cleanup block is empty
5045 for (Instruction &I : R) {
5046 auto *II = dyn_cast<IntrinsicInst>(&I);
5047 if (!II)
5048 return false;
5049
5050 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5051 switch (IntrinsicID) {
5052 case Intrinsic::dbg_declare:
5053 case Intrinsic::dbg_value:
5054 case Intrinsic::dbg_label:
5055 case Intrinsic::lifetime_end:
5056 break;
5057 default:
5058 return false;
5059 }
5060 }
5061 return true;
5062}
5063
5064// Simplify resume that is shared by several landing pads (phi of landing pad).
5065bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5066 BasicBlock *BB = RI->getParent();
5067
5068 // Check that there are no other instructions except for debug and lifetime
5069 // intrinsics between the phi's and resume instruction.
5071 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
5072 return false;
5073
5074 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5075 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5076
5077 // Check incoming blocks to see if any of them are trivial.
5078 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5079 Idx++) {
5080 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5081 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5082
5083 // If the block has other successors, we can not delete it because
5084 // it has other dependents.
5085 if (IncomingBB->getUniqueSuccessor() != BB)
5086 continue;
5087
5088 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5089 // Not the landing pad that caused the control to branch here.
5090 if (IncomingValue != LandingPad)
5091 continue;
5092
5094 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5095 TrivialUnwindBlocks.insert(IncomingBB);
5096 }
5097
5098 // If no trivial unwind blocks, don't do any simplifications.
5099 if (TrivialUnwindBlocks.empty())
5100 return false;
5101
5102 // Turn all invokes that unwind here into calls.
5103 for (auto *TrivialBB : TrivialUnwindBlocks) {
5104 // Blocks that will be simplified should be removed from the phi node.
5105 // Note there could be multiple edges to the resume block, and we need
5106 // to remove them all.
5107 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5108 BB->removePredecessor(TrivialBB, true);
5109
5110 for (BasicBlock *Pred :
5112 removeUnwindEdge(Pred, DTU);
5113 ++NumInvokes;
5114 }
5115
5116 // In each SimplifyCFG run, only the current processed block can be erased.
5117 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5118 // of erasing TrivialBB, we only remove the branch to the common resume
5119 // block so that we can later erase the resume block since it has no
5120 // predecessors.
5121 TrivialBB->getTerminator()->eraseFromParent();
5122 new UnreachableInst(RI->getContext(), TrivialBB);
5123 if (DTU)
5124 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5125 }
5126
5127 // Delete the resume block if all its predecessors have been removed.
5128 if (pred_empty(BB))
5129 DeleteDeadBlock(BB, DTU);
5130
5131 return !TrivialUnwindBlocks.empty();
5132}
5133
5134// Simplify resume that is only used by a single (non-phi) landing pad.
5135bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5136 BasicBlock *BB = RI->getParent();
5137 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5138 assert(RI->getValue() == LPInst &&
5139 "Resume must unwind the exception that caused control to here");
5140
5141 // Check that there are no other instructions except for debug intrinsics.
5143 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5144 return false;
5145
5146 // Turn all invokes that unwind here into calls and delete the basic block.
5148 removeUnwindEdge(Pred, DTU);
5149 ++NumInvokes;
5150 }
5151
5152 // The landingpad is now unreachable. Zap it.
5153 DeleteDeadBlock(BB, DTU);
5154 return true;
5155}
5156
5158 // If this is a trivial cleanup pad that executes no instructions, it can be
5159 // eliminated. If the cleanup pad continues to the caller, any predecessor
5160 // that is an EH pad will be updated to continue to the caller and any
5161 // predecessor that terminates with an invoke instruction will have its invoke
5162 // instruction converted to a call instruction. If the cleanup pad being
5163 // simplified does not continue to the caller, each predecessor will be
5164 // updated to continue to the unwind destination of the cleanup pad being
5165 // simplified.
5166 BasicBlock *BB = RI->getParent();
5167 CleanupPadInst *CPInst = RI->getCleanupPad();
5168 if (CPInst->getParent() != BB)
5169 // This isn't an empty cleanup.
5170 return false;
5171
5172 // We cannot kill the pad if it has multiple uses. This typically arises
5173 // from unreachable basic blocks.
5174 if (!CPInst->hasOneUse())
5175 return false;
5176
5177 // Check that there are no other instructions except for benign intrinsics.
5179 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5180 return false;
5181
5182 // If the cleanup return we are simplifying unwinds to the caller, this will
5183 // set UnwindDest to nullptr.
5184 BasicBlock *UnwindDest = RI->getUnwindDest();
5185 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5186
5187 // We're about to remove BB from the control flow. Before we do, sink any
5188 // PHINodes into the unwind destination. Doing this before changing the
5189 // control flow avoids some potentially slow checks, since we can currently
5190 // be certain that UnwindDest and BB have no common predecessors (since they
5191 // are both EH pads).
5192 if (UnwindDest) {
5193 // First, go through the PHI nodes in UnwindDest and update any nodes that
5194 // reference the block we are removing
5195 for (PHINode &DestPN : UnwindDest->phis()) {
5196 int Idx = DestPN.getBasicBlockIndex(BB);
5197 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5198 assert(Idx != -1);
5199 // This PHI node has an incoming value that corresponds to a control
5200 // path through the cleanup pad we are removing. If the incoming
5201 // value is in the cleanup pad, it must be a PHINode (because we
5202 // verified above that the block is otherwise empty). Otherwise, the
5203 // value is either a constant or a value that dominates the cleanup
5204 // pad being removed.
5205 //
5206 // Because BB and UnwindDest are both EH pads, all of their
5207 // predecessors must unwind to these blocks, and since no instruction
5208 // can have multiple unwind destinations, there will be no overlap in
5209 // incoming blocks between SrcPN and DestPN.
5210 Value *SrcVal = DestPN.getIncomingValue(Idx);
5211 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5212
5213 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5214 for (auto *Pred : predecessors(BB)) {
5215 Value *Incoming =
5216 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5217 DestPN.addIncoming(Incoming, Pred);
5218 }
5219 }
5220
5221 // Sink any remaining PHI nodes directly into UnwindDest.
5222 Instruction *InsertPt = DestEHPad;
5223 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5224 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5225 // If the PHI node has no uses or all of its uses are in this basic
5226 // block (meaning they are debug or lifetime intrinsics), just leave
5227 // it. It will be erased when we erase BB below.
5228 continue;
5229
5230 // Otherwise, sink this PHI node into UnwindDest.
5231 // Any predecessors to UnwindDest which are not already represented
5232 // must be back edges which inherit the value from the path through
5233 // BB. In this case, the PHI value must reference itself.
5234 for (auto *pred : predecessors(UnwindDest))
5235 if (pred != BB)
5236 PN.addIncoming(&PN, pred);
5237 PN.moveBefore(InsertPt);
5238 // Also, add a dummy incoming value for the original BB itself,
5239 // so that the PHI is well-formed until we drop said predecessor.
5240 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5241 }
5242 }
5243
5244 std::vector<DominatorTree::UpdateType> Updates;
5245
5246 // We use make_early_inc_range here because we will remove all predecessors.
5248 if (UnwindDest == nullptr) {
5249 if (DTU) {
5250 DTU->applyUpdates(Updates);
5251 Updates.clear();
5252 }
5253 removeUnwindEdge(PredBB, DTU);
5254 ++NumInvokes;
5255 } else {
5256 BB->removePredecessor(PredBB);
5257 Instruction *TI = PredBB->getTerminator();
5258 TI->replaceUsesOfWith(BB, UnwindDest);
5259 if (DTU) {
5260 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5261 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5262 }
5263 }
5264 }
5265
5266 if (DTU)
5267 DTU->applyUpdates(Updates);
5268
5269 DeleteDeadBlock(BB, DTU);
5270
5271 return true;
5272}
5273
5274// Try to merge two cleanuppads together.
5276 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5277 // with.
5278 BasicBlock *UnwindDest = RI->getUnwindDest();
5279 if (!UnwindDest)
5280 return false;
5281
5282 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5283 // be safe to merge without code duplication.
5284 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5285 return false;
5286
5287 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5288 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5289 if (!SuccessorCleanupPad)
5290 return false;
5291
5292 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5293 // Replace any uses of the successor cleanupad with the predecessor pad
5294 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5295 // funclet bundle operands.
5296 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5297 // Remove the old cleanuppad.
5298 SuccessorCleanupPad->eraseFromParent();
5299 // Now, we simply replace the cleanupret with a branch to the unwind
5300 // destination.
5301 BranchInst::Create(UnwindDest, RI->getParent());
5302 RI->eraseFromParent();
5303
5304 return true;
5305}
5306
5307bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5308 // It is possible to transiantly have an undef cleanuppad operand because we
5309 // have deleted some, but not all, dead blocks.
5310 // Eventually, this block will be deleted.
5311 if (isa<UndefValue>(RI->getOperand(0)))
5312 return false;
5313
5314 if (mergeCleanupPad(RI))
5315 return true;
5316
5317 if (removeEmptyCleanup(RI, DTU))
5318 return true;
5319
5320 return false;
5321}
5322
5323// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5324bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5325 BasicBlock *BB = UI->getParent();
5326
5327 bool Changed = false;
5328
5329 // Ensure that any debug-info records that used to occur after the Unreachable
5330 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5331 // the block.
5333
5334 // Debug-info records on the unreachable inst itself should be deleted, as
5335 // below we delete everything past the final executable instruction.
5336 UI->dropDbgRecords();
5337
5338 // If there are any instructions immediately before the unreachable that can
5339 // be removed, do so.
5340 while (UI->getIterator() != BB->begin()) {
5342 --BBI;
5343
5345 break; // Can not drop any more instructions. We're done here.
5346 // Otherwise, this instruction can be freely erased,
5347 // even if it is not side-effect free.
5348
5349 // Note that deleting EH's here is in fact okay, although it involves a bit
5350 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5351 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5352 // and we can therefore guarantee this block will be erased.
5353
5354 // If we're deleting this, we're deleting any subsequent debug info, so
5355 // delete DbgRecords.
5356 BBI->dropDbgRecords();
5357
5358 // Delete this instruction (any uses are guaranteed to be dead)
5359 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5360 BBI->eraseFromParent();
5361 Changed = true;
5362 }
5363
5364 // If the unreachable instruction is the first in the block, take a gander
5365 // at all of the predecessors of this instruction, and simplify them.
5366 if (&BB->front() != UI)
5367 return Changed;
5368
5369 std::vector<DominatorTree::UpdateType> Updates;
5370
5372 for (BasicBlock *Predecessor : Preds) {
5373 Instruction *TI = Predecessor->getTerminator();
5374 IRBuilder<> Builder(TI);
5375 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5376 // We could either have a proper unconditional branch,
5377 // or a degenerate conditional branch with matching destinations.
5378 if (all_of(BI->successors(),
5379 [BB](auto *Successor) { return Successor == BB; })) {
5380 new UnreachableInst(TI->getContext(), TI->getIterator());
5381 TI->eraseFromParent();
5382 Changed = true;
5383 } else {
5384 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5385 Value* Cond = BI->getCondition();
5386 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5387 "The destinations are guaranteed to be different here.");
5388 CallInst *Assumption;
5389 if (BI->getSuccessor(0) == BB) {
5390 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5391 Builder.CreateBr(BI->getSuccessor(1));
5392 } else {
5393 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5394 Assumption = Builder.CreateAssumption(Cond);
5395 Builder.CreateBr(BI->getSuccessor(0));
5396 }
5397 if (Options.AC)
5398 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5399
5401 Changed = true;
5402 }
5403 if (DTU)
5404 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5405 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5407 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5408 if (i->getCaseSuccessor() != BB) {
5409 ++i;
5410 continue;
5411 }
5412 BB->removePredecessor(SU->getParent());
5413 i = SU.removeCase(i);
5414 e = SU->case_end();
5415 Changed = true;
5416 }
5417 // Note that the default destination can't be removed!
5418 if (DTU && SI->getDefaultDest() != BB)
5419 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5420 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5421 if (II->getUnwindDest() == BB) {
5422 if (DTU) {
5423 DTU->applyUpdates(Updates);
5424 Updates.clear();
5425 }
5426 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5427 if (!CI->doesNotThrow())
5428 CI->setDoesNotThrow();
5429 Changed = true;
5430 }
5431 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5432 if (CSI->getUnwindDest() == BB) {
5433 if (DTU) {
5434 DTU->applyUpdates(Updates);
5435 Updates.clear();
5436 }
5437 removeUnwindEdge(TI->getParent(), DTU);
5438 Changed = true;
5439 continue;
5440 }
5441
5442 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5443 E = CSI->handler_end();
5444 I != E; ++I) {
5445 if (*I == BB) {
5446 CSI->removeHandler(I);
5447 --I;
5448 --E;
5449 Changed = true;
5450 }
5451 }
5452 if (DTU)
5453 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5454 if (CSI->getNumHandlers() == 0) {
5455 if (CSI->hasUnwindDest()) {
5456 // Redirect all predecessors of the block containing CatchSwitchInst
5457 // to instead branch to the CatchSwitchInst's unwind destination.
5458 if (DTU) {
5459 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5460 Updates.push_back({DominatorTree::Insert,
5461 PredecessorOfPredecessor,
5462 CSI->getUnwindDest()});
5463 Updates.push_back({DominatorTree::Delete,
5464 PredecessorOfPredecessor, Predecessor});
5465 }
5466 }
5467 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5468 } else {
5469 // Rewrite all preds to unwind to caller (or from invoke to call).
5470 if (DTU) {
5471 DTU->applyUpdates(Updates);
5472 Updates.clear();
5473 }
5474 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5475 for (BasicBlock *EHPred : EHPreds)
5476 removeUnwindEdge(EHPred, DTU);
5477 }
5478 // The catchswitch is no longer reachable.
5479 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5480 CSI->eraseFromParent();
5481 Changed = true;
5482 }
5483 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5484 (void)CRI;
5485 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5486 "Expected to always have an unwind to BB.");
5487 if (DTU)
5488 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5489 new UnreachableInst(TI->getContext(), TI->getIterator());
5490 TI->eraseFromParent();
5491 Changed = true;
5492 }
5493 }
5494
5495 if (DTU)
5496 DTU->applyUpdates(Updates);
5497
5498 // If this block is now dead, remove it.
5499 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5500 DeleteDeadBlock(BB, DTU);
5501 return true;
5502 }
5503
5504 return Changed;
5505}
5506
5508 assert(Cases.size() >= 1);
5509
5511 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5512 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5513 return false;
5514 }
5515 return true;
5516}
5517
5519 DomTreeUpdater *DTU,
5520 bool RemoveOrigDefaultBlock = true) {
5521 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5522 auto *BB = Switch->getParent();
5523 auto *OrigDefaultBlock = Switch->getDefaultDest();
5524 if (RemoveOrigDefaultBlock)
5525 OrigDefaultBlock->removePredecessor(BB);
5526 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5527 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5528 OrigDefaultBlock);
5529 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5530 Switch->setDefaultDest(&*NewDefaultBlock);
5531 if (DTU) {
5533 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5534 if (RemoveOrigDefaultBlock &&
5535 !is_contained(successors(BB), OrigDefaultBlock))
5536 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5537 DTU->applyUpdates(Updates);
5538 }
5539}
5540
5541/// Turn a switch into an integer range comparison and branch.
5542/// Switches with more than 2 destinations are ignored.
5543/// Switches with 1 destination are also ignored.
5544bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5545 IRBuilder<> &Builder) {
5546 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5547
5548 bool HasDefault =
5549 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5550
5551 auto *BB = SI->getParent();
5552
5553 // Partition the cases into two sets with different destinations.
5554 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5555 BasicBlock *DestB = nullptr;
5558
5559 for (auto Case : SI->cases()) {
5560 BasicBlock *Dest = Case.getCaseSuccessor();
5561 if (!DestA)
5562 DestA = Dest;
5563 if (Dest == DestA) {
5564 CasesA.push_back(Case.getCaseValue());
5565 continue;
5566 }
5567 if (!DestB)
5568 DestB = Dest;
5569 if (Dest == DestB) {
5570 CasesB.push_back(Case.getCaseValue());
5571 continue;
5572 }
5573 return false; // More than two destinations.
5574 }
5575 if (!DestB)
5576 return false; // All destinations are the same and the default is unreachable
5577
5578 assert(DestA && DestB &&
5579 "Single-destination switch should have been folded.");
5580 assert(DestA != DestB);
5581 assert(DestB != SI->getDefaultDest());
5582 assert(!CasesB.empty() && "There must be non-default cases.");
5583 assert(!CasesA.empty() || HasDefault);
5584
5585 // Figure out if one of the sets of cases form a contiguous range.
5586 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5587 BasicBlock *ContiguousDest = nullptr;
5588 BasicBlock *OtherDest = nullptr;
5589 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5590 ContiguousCases = &CasesA;
5591 ContiguousDest = DestA;
5592 OtherDest = DestB;
5593 } else if (casesAreContiguous(CasesB)) {
5594 ContiguousCases = &CasesB;
5595 ContiguousDest = DestB;
5596 OtherDest = DestA;
5597 } else
5598 return false;
5599
5600 // Start building the compare and branch.
5601
5602 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5603 Constant *NumCases =
5604 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5605
5606 Value *Sub = SI->getCondition();
5607 if (!Offset->isNullValue())
5608 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5609
5610 Value *Cmp;
5611 // If NumCases overflowed, then all possible values jump to the successor.
5612 if (NumCases->isNullValue() && !ContiguousCases->empty())
5613 Cmp = ConstantInt::getTrue(SI->getContext());
5614 else
5615 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5616 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5617
5618 // Update weight for the newly-created conditional branch.
5619 if (hasBranchWeightMD(*SI)) {
5621 getBranchWeights(SI, Weights);
5622 if (Weights.size() == 1 + SI->getNumCases()) {
5623 uint64_t TrueWeight = 0;
5624 uint64_t FalseWeight = 0;
5625 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5626 if (SI->getSuccessor(I) == ContiguousDest)
5627 TrueWeight += Weights[I];
5628 else
5629 FalseWeight += Weights[I];
5630 }
5631 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5632 TrueWeight /= 2;
5633 FalseWeight /= 2;
5634 }
5635 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5636 }
5637 }
5638
5639 // Prune obsolete incoming values off the successors' PHI nodes.
5640 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5641 unsigned PreviousEdges = ContiguousCases->size();
5642 if (ContiguousDest == SI->getDefaultDest())
5643 ++PreviousEdges;
5644 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5645 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5646 }
5647 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5648 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5649 if (OtherDest == SI->getDefaultDest())
5650 ++PreviousEdges;
5651 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5652 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5653 }
5654
5655 // Clean up the default block - it may have phis or other instructions before
5656 // the unreachable terminator.
5657 if (!HasDefault)
5659
5660 auto *UnreachableDefault = SI->getDefaultDest();
5661
5662 // Drop the switch.
5663 SI->eraseFromParent();
5664
5665 if (!HasDefault && DTU)
5666 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5667
5668 return true;
5669}
5670
5671/// Compute masked bits for the condition of a switch
5672/// and use it to remove dead cases.
5674 AssumptionCache *AC,
5675 const DataLayout &DL) {
5676 Value *Cond = SI->getCondition();
5677 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5678
5679 // We can also eliminate cases by determining that their values are outside of
5680 // the limited range of the condition based on how many significant (non-sign)
5681 // bits are in the condition value.
5682 unsigned MaxSignificantBitsInCond =
5683 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5684
5685 // Gather dead cases.
5687 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5688 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5689 for (const auto &Case : SI->cases()) {
5690 auto *Successor = Case.getCaseSuccessor();
5691 if (DTU) {
5692 if (!NumPerSuccessorCases.count(Successor))
5693 UniqueSuccessors.push_back(Successor);
5694 ++NumPerSuccessorCases[Successor];
5695 }
5696 const APInt &CaseVal = Case.getCaseValue()->getValue();
5697 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5698 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5699 DeadCases.push_back(Case.getCaseValue());
5700 if (DTU)
5701 --NumPerSuccessorCases[Successor];
5702 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5703 << " is dead.\n");
5704 }
5705 }
5706
5707 // If we can prove that the cases must cover all possible values, the
5708 // default destination becomes dead and we can remove it. If we know some
5709 // of the bits in the value, we can use that to more precisely compute the
5710 // number of possible unique case values.
5711 bool HasDefault =
5712 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5713 const unsigned NumUnknownBits =
5714 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5715 assert(NumUnknownBits <= Known.getBitWidth());
5716 if (HasDefault && DeadCases.empty() &&
5717 NumUnknownBits < 64 /* avoid overflow */) {
5718 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5719 if (SI->getNumCases() == AllNumCases) {
5721 return true;
5722 }
5723 // When only one case value is missing, replace default with that case.
5724 // Eliminating the default branch will provide more opportunities for
5725 // optimization, such as lookup tables.
5726 if (SI->getNumCases() == AllNumCases - 1) {
5727 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5728 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5729 if (CondTy->getIntegerBitWidth() > 64 ||
5730 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5731 return false;
5732
5733 uint64_t MissingCaseVal = 0;
5734 for (const auto &Case : SI->cases())
5735 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5736 auto *MissingCase =
5737 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5739 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5740 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5741 SIW.setSuccessorWeight(0, 0);
5742 return true;
5743 }
5744 }
5745
5746 if (DeadCases.empty())
5747 return false;
5748
5750 for (ConstantInt *DeadCase : DeadCases) {
5751 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5752 assert(CaseI != SI->case_default() &&
5753 "Case was not found. Probably mistake in DeadCases forming.");
5754 // Prune unused values from PHI nodes.
5755 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5756 SIW.removeCase(CaseI);
5757 }
5758
5759 if (DTU) {
5760 std::vector<DominatorTree::UpdateType> Updates;
5761 for (auto *Successor : UniqueSuccessors)
5762 if (NumPerSuccessorCases[Successor] == 0)
5763 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5764 DTU->applyUpdates(Updates);
5765 }
5766
5767 return true;
5768}
5769
5770/// If BB would be eligible for simplification by
5771/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5772/// by an unconditional branch), look at the phi node for BB in the successor
5773/// block and see if the incoming value is equal to CaseValue. If so, return
5774/// the phi node, and set PhiIndex to BB's index in the phi node.
5776 BasicBlock *BB, int *PhiIndex) {
5777 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5778 return nullptr; // BB must be empty to be a candidate for simplification.
5779 if (!BB->getSinglePredecessor())
5780 return nullptr; // BB must be dominated by the switch.
5781
5782 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5783 if (!Branch || !Branch->isUnconditional())
5784 return nullptr; // Terminator must be unconditional branch.
5785
5786 BasicBlock *Succ = Branch->getSuccessor(0);
5787
5788 for (PHINode &PHI : Succ->phis()) {
5789 int Idx = PHI.getBasicBlockIndex(BB);
5790 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5791
5792 Value *InValue = PHI.getIncomingValue(Idx);
5793 if (InValue != CaseValue)
5794 continue;
5795
5796 *PhiIndex = Idx;
5797 return &PHI;
5798 }
5799
5800 return nullptr;
5801}
5802
5803/// Try to forward the condition of a switch instruction to a phi node
5804/// dominated by the switch, if that would mean that some of the destination
5805/// blocks of the switch can be folded away. Return true if a change is made.
5807 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5808
5809 ForwardingNodesMap ForwardingNodes;
5810 BasicBlock *SwitchBlock = SI->getParent();
5811 bool Changed = false;
5812 for (const auto &Case : SI->cases()) {
5813 ConstantInt *CaseValue = Case.getCaseValue();
5814 BasicBlock *CaseDest = Case.getCaseSuccessor();
5815
5816 // Replace phi operands in successor blocks that are using the constant case
5817 // value rather than the switch condition variable:
5818 // switchbb:
5819 // switch i32 %x, label %default [
5820 // i32 17, label %succ
5821 // ...
5822 // succ:
5823 // %r = phi i32 ... [ 17, %switchbb ] ...
5824 // -->
5825 // %r = phi i32 ... [ %x, %switchbb ] ...
5826
5827 for (PHINode &Phi : CaseDest->phis()) {
5828 // This only works if there is exactly 1 incoming edge from the switch to
5829 // a phi. If there is >1, that means multiple cases of the switch map to 1
5830 // value in the phi, and that phi value is not the switch condition. Thus,
5831 // this transform would not make sense (the phi would be invalid because
5832 // a phi can't have different incoming values from the same block).
5833 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5834 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5835 count(Phi.blocks(), SwitchBlock) == 1) {
5836 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5837 Changed = true;
5838 }
5839 }
5840
5841 // Collect phi nodes that are indirectly using this switch's case constants.
5842 int PhiIdx;
5843 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5844 ForwardingNodes[Phi].push_back(PhiIdx);
5845 }
5846
5847 for (auto &ForwardingNode : ForwardingNodes) {
5848 PHINode *Phi = ForwardingNode.first;
5849 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5850 // Check if it helps to fold PHI.
5851 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
5852 continue;
5853
5854 for (int Index : Indexes)
5855 Phi->setIncomingValue(Index, SI->getCondition());
5856 Changed = true;
5857 }
5858
5859 return Changed;
5860}
5861
5862/// Return true if the backend will be able to handle
5863/// initializing an array of constants like C.
5865 if (C->isThreadDependent())
5866 return false;
5867 if (C->isDLLImportDependent())
5868 return false;
5869
5870 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5871 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5872 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5873 return false;
5874
5875 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5876 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5877 // materializing the array of constants.
5878 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5879 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
5880 return false;
5881 }
5882
5884 return false;
5885
5886 return true;
5887}
5888
5889/// If V is a Constant, return it. Otherwise, try to look up
5890/// its constant value in ConstantPool, returning 0 if it's not there.
5891static Constant *
5894 if (Constant *C = dyn_cast<Constant>(V))
5895 return C;
5896 return ConstantPool.lookup(V);
5897}
5898
5899/// Try to fold instruction I into a constant. This works for
5900/// simple instructions such as binary operations where both operands are
5901/// constant or can be replaced by constants from the ConstantPool. Returns the
5902/// resulting constant on success, 0 otherwise.
5903static Constant *
5906 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5907 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
5908 if (!A)
5909 return nullptr;
5910 if (A->isAllOnesValue())
5911 return lookupConstant(Select->getTrueValue(), ConstantPool);
5912 if (A->isNullValue())
5913 return lookupConstant(Select->getFalseValue(), ConstantPool);
5914 return nullptr;
5915 }
5916
5918 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5919 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
5920 COps.push_back(A);
5921 else
5922 return nullptr;
5923 }
5924
5925 return ConstantFoldInstOperands(I, COps, DL);
5926}
5927
5928/// Try to determine the resulting constant values in phi nodes
5929/// at the common destination basic block, *CommonDest, for one of the case
5930/// destionations CaseDest corresponding to value CaseVal (0 for the default
5931/// case), of a switch instruction SI.
5932static bool
5934 BasicBlock **CommonDest,
5935 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5936 const DataLayout &DL, const TargetTransformInfo &TTI) {
5937 // The block from which we enter the common destination.
5938 BasicBlock *Pred = SI->getParent();
5939
5940 // If CaseDest is empty except for some side-effect free instructions through
5941 // which we can constant-propagate the CaseVal, continue to its successor.
5943 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5944 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5945 if (I.isTerminator()) {
5946 // If the terminator is a simple branch, continue to the next block.
5947 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5948 return false;
5949 Pred = CaseDest;
5950 CaseDest = I.getSuccessor(0);
5951 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
5952 // Instruction is side-effect free and constant.
5953
5954 // If the instruction has uses outside this block or a phi node slot for
5955 // the block, it is not safe to bypass the instruction since it would then
5956 // no longer dominate all its uses.
5957 for (auto &Use : I.uses()) {
5958 User *User = Use.getUser();
5959 if (Instruction *I = dyn_cast<Instruction>(User))
5960 if (I->getParent() == CaseDest)
5961 continue;
5962 if (PHINode *Phi = dyn_cast<PHINode>(User))
5963 if (Phi->getIncomingBlock(Use) == CaseDest)
5964 continue;
5965 return false;
5966 }
5967
5968 ConstantPool.insert(std::make_pair(&I, C));
5969 } else {
5970 break;
5971 }
5972 }
5973
5974 // If we did not have a CommonDest before, use the current one.
5975 if (!*CommonDest)
5976 *CommonDest = CaseDest;
5977 // If the destination isn't the common one, abort.
5978 if (CaseDest != *CommonDest)
5979 return false;
5980
5981 // Get the values for this case from phi nodes in the destination block.
5982 for (PHINode &PHI : (*CommonDest)->phis()) {
5983 int Idx = PHI.getBasicBlockIndex(Pred);
5984 if (Idx == -1)
5985 continue;
5986
5987 Constant *ConstVal =
5988 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5989 if (!ConstVal)
5990 return false;
5991
5992 // Be conservative about which kinds of constants we support.
5993 if (!validLookupTableConstant(ConstVal, TTI))
5994 return false;
5995
5996 Res.push_back(std::make_pair(&PHI, ConstVal));
5997 }
5998
5999 return Res.size() > 0;
6000}
6001
6002// Helper function used to add CaseVal to the list of cases that generate
6003// Result. Returns the updated number of cases that generate this result.
6004static size_t mapCaseToResult(ConstantInt *CaseVal,
6005 SwitchCaseResultVectorTy &UniqueResults,
6006 Constant *Result) {
6007 for (auto &I : UniqueResults) {
6008 if (I.first == Result) {
6009 I.second.push_back(CaseVal);
6010 return I.second.size();
6011 }
6012 }
6013 UniqueResults.push_back(
6014 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6015 return 1;
6016}
6017
6018// Helper function that initializes a map containing
6019// results for the PHI node of the common destination block for a switch
6020// instruction. Returns false if multiple PHI nodes have been found or if
6021// there is not a common destination block for the switch.
6023 BasicBlock *&CommonDest,
6024 SwitchCaseResultVectorTy &UniqueResults,
6025 Constant *&DefaultResult,
6026 const DataLayout &DL,
6027 const TargetTransformInfo &TTI,
6028 uintptr_t MaxUniqueResults) {
6029 for (const auto &I : SI->cases()) {
6030 ConstantInt *CaseVal = I.getCaseValue();
6031
6032 // Resulting value at phi nodes for this case value.
6033 SwitchCaseResultsTy Results;
6034 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6035 DL, TTI))
6036 return false;
6037
6038 // Only one value per case is permitted.
6039 if (Results.size() > 1)
6040 return false;
6041
6042 // Add the case->result mapping to UniqueResults.
6043 const size_t NumCasesForResult =
6044 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6045
6046 // Early out if there are too many cases for this result.
6047 if (NumCasesForResult > MaxSwitchCasesPerResult)
6048 return false;
6049
6050 // Early out if there are too many unique results.
6051 if (UniqueResults.size() > MaxUniqueResults)
6052 return false;
6053
6054 // Check the PHI consistency.
6055 if (!PHI)
6056 PHI = Results[0].first;
6057 else if (PHI != Results[0].first)
6058 return false;
6059 }
6060 // Find the default result value.
6062 BasicBlock *DefaultDest = SI->getDefaultDest();
6063 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6064 DL, TTI);
6065 // If the default value is not found abort unless the default destination
6066 // is unreachable.
6067 DefaultResult =
6068 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6069 if ((!DefaultResult &&
6070 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6071 return false;
6072
6073 return true;
6074}
6075
6076// Helper function that checks if it is possible to transform a switch with only
6077// two cases (or two cases + default) that produces a result into a select.
6078// TODO: Handle switches with more than 2 cases that map to the same result.
6079static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6080 Constant *DefaultResult, Value *Condition,
6081 IRBuilder<> &Builder) {
6082 // If we are selecting between only two cases transform into a simple
6083 // select or a two-way select if default is possible.
6084 // Example:
6085 // switch (a) { %0 = icmp eq i32 %a, 10
6086 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6087 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6088 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6089 // }
6090 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6091 ResultVector[1].second.size() == 1) {
6092 ConstantInt *FirstCase = ResultVector[0].second[0];
6093 ConstantInt *SecondCase = ResultVector[1].second[0];
6094 Value *SelectValue = ResultVector[1].first;
6095 if (DefaultResult) {
6096 Value *ValueCompare =
6097 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6098 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6099 DefaultResult, "switch.select");
6100 }
6101 Value *ValueCompare =
6102 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6103 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6104 SelectValue, "switch.select");
6105 }
6106
6107 // Handle the degenerate case where two cases have the same result value.
6108 if (ResultVector.size() == 1 && DefaultResult) {
6109 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6110 unsigned CaseCount = CaseValues.size();
6111 // n bits group cases map to the same result:
6112 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6113 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6114 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6115 if (isPowerOf2_32(CaseCount)) {
6116 ConstantInt *MinCaseVal = CaseValues[0];
6117 // Find mininal value.
6118 for (auto *Case : CaseValues)
6119 if (Case->getValue().slt(MinCaseVal->getValue()))
6120 MinCaseVal = Case;
6121
6122 // Mark the bits case number touched.
6123 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6124 for (auto *Case : CaseValues)
6125 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6126
6127 // Check if cases with the same result can cover all number
6128 // in touched bits.
6129 if (BitMask.popcount() == Log2_32(CaseCount)) {
6130 if (!MinCaseVal->isNullValue())
6131 Condition = Builder.CreateSub(Condition, MinCaseVal);
6132 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6133 Value *Cmp = Builder.CreateICmpEQ(
6134 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6135 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6136 }
6137 }
6138
6139 // Handle the degenerate case where two cases have the same value.
6140 if (CaseValues.size() == 2) {
6141 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6142 "switch.selectcmp.case1");
6143 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6144 "switch.selectcmp.case2");
6145 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6146 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6147 }
6148 }
6149
6150 return nullptr;
6151}
6152
6153// Helper function to cleanup a switch instruction that has been converted into
6154// a select, fixing up PHI nodes and basic blocks.
6156 Value *SelectValue,
6157 IRBuilder<> &Builder,
6158 DomTreeUpdater *DTU) {
6159 std::vector<DominatorTree::UpdateType> Updates;
6160
6161 BasicBlock *SelectBB = SI->getParent();
6162 BasicBlock *DestBB = PHI->getParent();
6163
6164 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6165 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6166 Builder.CreateBr(DestBB);
6167
6168 // Remove the switch.
6169
6170 PHI->removeIncomingValueIf(
6171 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6172 PHI->addIncoming(SelectValue, SelectBB);
6173
6174 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6175 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6176 BasicBlock *Succ = SI->getSuccessor(i);
6177
6178 if (Succ == DestBB)
6179 continue;
6180 Succ->removePredecessor(SelectBB);
6181 if (DTU && RemovedSuccessors.insert(Succ).second)
6182 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6183 }
6184 SI->eraseFromParent();
6185 if (DTU)
6186 DTU->applyUpdates(Updates);
6187}
6188
6189/// If a switch is only used to initialize one or more phi nodes in a common
6190/// successor block with only two different constant values, try to replace the
6191/// switch with a select. Returns true if the fold was made.
6192static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6193 DomTreeUpdater *DTU, const DataLayout &DL,
6194 const TargetTransformInfo &TTI) {
6195 Value *const Cond = SI->getCondition();
6196 PHINode *PHI = nullptr;
6197 BasicBlock *CommonDest = nullptr;
6198 Constant *DefaultResult;
6199 SwitchCaseResultVectorTy UniqueResults;
6200 // Collect all the cases that will deliver the same value from the switch.
6201 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6202 DL, TTI, /*MaxUniqueResults*/ 2))
6203 return false;
6204
6205 assert(PHI != nullptr && "PHI for value select not found");
6206 Builder.SetInsertPoint(SI);
6207 Value *SelectValue =
6208 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6209 if (!SelectValue)
6210 return false;
6211
6212 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6213 return true;
6214}
6215
6216namespace {
6217
6218/// This class represents a lookup table that can be used to replace a switch.
6219class SwitchLookupTable {
6220public:
6221 /// Create a lookup table to use as a switch replacement with the contents
6222 /// of Values, using DefaultValue to fill any holes in the table.
6223 SwitchLookupTable(
6224 Module &M, uint64_t TableSize, ConstantInt *Offset,
6225 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6226 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6227
6228 /// Build instructions with Builder to retrieve the value at
6229 /// the position given by Index in the lookup table.
6230 Value *buildLookup(Value *Index, IRBuilder<> &Builder);
6231
6232 /// Return true if a table with TableSize elements of
6233 /// type ElementType would fit in a target-legal register.
6234 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6235 Type *ElementType);
6236
6237private:
6238 // Depending on the contents of the table, it can be represented in
6239 // different ways.
6240 enum {
6241 // For tables where each element contains the same value, we just have to
6242 // store that single value and return it for each lookup.
6243 SingleValueKind,
6244
6245 // For tables where there is a linear relationship between table index
6246 // and values. We calculate the result with a simple multiplication
6247 // and addition instead of a table lookup.
6248 LinearMapKind,
6249
6250 // For small tables with integer elements, we can pack them into a bitmap
6251 // that fits into a target-legal register. Values are retrieved by
6252 // shift and mask operations.
6253 BitMapKind,
6254
6255 // The table is stored as an array of values. Values are retrieved by load
6256 // instructions from the table.
6257 ArrayKind
6258 } Kind;
6259
6260 // For SingleValueKind, this is the single value.
6261 Constant *SingleValue = nullptr;
6262
6263 // For BitMapKind, this is the bitmap.
6264 ConstantInt *BitMap = nullptr;
6265 IntegerType *BitMapElementTy = nullptr;
6266
6267 // For LinearMapKind, these are the constants used to derive the value.
6268 ConstantInt *LinearOffset = nullptr;
6269 ConstantInt *LinearMultiplier = nullptr;
6270 bool LinearMapValWrapped = false;
6271
6272 // For ArrayKind, this is the array.
6273 GlobalVariable *Array = nullptr;
6274};
6275
6276} // end anonymous namespace
6277
6278SwitchLookupTable::SwitchLookupTable(
6279 Module &M, uint64_t TableSize, ConstantInt *Offset,
6280 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6281 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6282 assert(Values.size() && "Can't build lookup table without values!");
6283 assert(TableSize >= Values.size() && "Can't fit values in table!");
6284
6285 // If all values in the table are equal, this is that value.
6286 SingleValue = Values.begin()->second;
6287
6288 Type *ValueType = Values.begin()->second->getType();
6289
6290 // Build up the table contents.
6291 SmallVector<Constant *, 64> TableContents(TableSize);
6292 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6293 ConstantInt *CaseVal = Values[I].first;
6294 Constant *CaseRes = Values[I].second;
6295 assert(CaseRes->getType() == ValueType);
6296
6297 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6298 TableContents[Idx] = CaseRes;
6299
6300 if (CaseRes != SingleValue)
6301 SingleValue = nullptr;
6302 }
6303
6304 // Fill in any holes in the table with the default result.
6305 if (Values.size() < TableSize) {
6306 assert(DefaultValue &&
6307 "Need a default value to fill the lookup table holes.");
6308 assert(DefaultValue->getType() == ValueType);
6309 for (uint64_t I = 0; I < TableSize; ++I) {
6310 if (!TableContents[I])
6311 TableContents[I] = DefaultValue;
6312 }
6313
6314 if (DefaultValue != SingleValue)
6315 SingleValue = nullptr;
6316 }
6317
6318 // If each element in the table contains the same value, we only need to store
6319 // that single value.
6320 if (SingleValue) {
6321 Kind = SingleValueKind;
6322 return;
6323 }
6324
6325 // Check if we can derive the value with a linear transformation from the
6326 // table index.
6327 if (isa<IntegerType>(ValueType)) {
6328 bool LinearMappingPossible = true;
6329 APInt PrevVal;
6330 APInt DistToPrev;
6331 // When linear map is monotonic and signed overflow doesn't happen on
6332 // maximum index, we can attach nsw on Add and Mul.
6333 bool NonMonotonic = false;
6334 assert(TableSize >= 2 && "Should be a SingleValue table.");
6335 // Check if there is the same distance between two consecutive values.
6336 for (uint64_t I = 0; I < TableSize; ++I) {
6337 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6338 if (!ConstVal) {
6339 // This is an undef. We could deal with it, but undefs in lookup tables
6340 // are very seldom. It's probably not worth the additional complexity.
6341 LinearMappingPossible = false;
6342 break;
6343 }
6344 const APInt &Val = ConstVal->getValue();
6345 if (I != 0) {
6346 APInt Dist = Val - PrevVal;
6347 if (I == 1) {
6348 DistToPrev = Dist;
6349 } else if (Dist != DistToPrev) {
6350 LinearMappingPossible = false;
6351 break;
6352 }
6353 NonMonotonic |=
6354 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6355 }
6356 PrevVal = Val;
6357 }
6358 if (LinearMappingPossible) {
6359 LinearOffset = cast<ConstantInt>(TableContents[0]);
6360 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6361 bool MayWrap = false;
6362 APInt M = LinearMultiplier->getValue();
6363 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6364 LinearMapValWrapped = NonMonotonic || MayWrap;
6365 Kind = LinearMapKind;
6366 ++NumLinearMaps;
6367 return;
6368 }
6369 }
6370
6371 // If the type is integer and the table fits in a register, build a bitmap.
6372 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6373 IntegerType *IT = cast<IntegerType>(ValueType);
6374 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6375 for (uint64_t I = TableSize; I > 0; --I) {
6376 TableInt <<= IT->getBitWidth();
6377 // Insert values into the bitmap. Undef values are set to zero.
6378 if (!isa<UndefValue>(TableContents[I - 1])) {
6379 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6380 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6381 }
6382 }
6383 BitMap = ConstantInt::get(M.getContext(), TableInt);
6384 BitMapElementTy = IT;
6385 Kind = BitMapKind;
6386 ++NumBitMaps;
6387 return;
6388 }
6389
6390 // Store the table in an array.
6391 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6392 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6393
6394 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6395 GlobalVariable::PrivateLinkage, Initializer,
6396 "switch.table." + FuncName);
6397 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6398 // Set the alignment to that of an array items. We will be only loading one
6399 // value out of it.
6400 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6401 Kind = ArrayKind;
6402}
6403
6404Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) {
6405 switch (Kind) {
6406 case SingleValueKind:
6407 return SingleValue;
6408 case LinearMapKind: {
6409 // Derive the result value from the input value.
6410 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6411 false, "switch.idx.cast");
6412 if (!LinearMultiplier->isOne())
6413 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6414 /*HasNUW = */ false,
6415 /*HasNSW = */ !LinearMapValWrapped);
6416
6417 if (!LinearOffset->isZero())
6418 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6419 /*HasNUW = */ false,
6420 /*HasNSW = */ !LinearMapValWrapped);
6421 return Result;
6422 }
6423 case BitMapKind: {
6424 // Type of the bitmap (e.g. i59).
6425 IntegerType *MapTy = BitMap->getIntegerType();
6426
6427 // Cast Index to the same type as the bitmap.
6428 // Note: The Index is <= the number of elements in the table, so
6429 // truncating it to the width of the bitmask is safe.
6430 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6431
6432 // Multiply the shift amount by the element width. NUW/NSW can always be
6433 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6434 // BitMap's bit width.
6435 ShiftAmt = Builder.CreateMul(
6436 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6437 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6438
6439 // Shift down.
6440 Value *DownShifted =
6441 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6442 // Mask off.
6443 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6444 }
6445 case ArrayKind: {
6446 // Make sure the table index will not overflow when treated as signed.
6447 IntegerType *IT = cast<IntegerType>(Index->getType());
6448 uint64_t TableSize =
6449 Array->getInitializer()->getType()->getArrayNumElements();
6450 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6451 Index = Builder.CreateZExt(
6452 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6453 "switch.tableidx.zext");
6454
6455 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6456 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6457 GEPIndices, "switch.gep");
6458 return Builder.CreateLoad(
6459 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6460 "switch.load");
6461 }
6462 }
6463 llvm_unreachable("Unknown lookup table kind!");
6464}
6465
6466bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6467 uint64_t TableSize,
6468 Type *ElementType) {
6469 auto *IT = dyn_cast<IntegerType>(ElementType);
6470 if (!IT)
6471 return false;
6472 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6473 // are <= 15, we could try to narrow the type.
6474
6475 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6476 if (TableSize >= UINT_MAX / IT->getBitWidth())
6477 return false;
6478 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6479}
6480
6482 const DataLayout &DL) {
6483 // Allow any legal type.
6484 if (TTI.isTypeLegal(Ty))
6485 return true;
6486
6487 auto *IT = dyn_cast<IntegerType>(Ty);
6488 if (!IT)
6489 return false;
6490
6491 // Also allow power of 2 integer types that have at least 8 bits and fit in
6492 // a register. These types are common in frontend languages and targets
6493 // usually support loads of these types.
6494 // TODO: We could relax this to any integer that fits in a register and rely
6495 // on ABI alignment and padding in the table to allow the load to be widened.
6496 // Or we could widen the constants and truncate the load.
6497 unsigned BitWidth = IT->getBitWidth();
6498 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6499 DL.fitsInLegalInteger(IT->getBitWidth());
6500}
6501
6502static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6503 // 40% is the default density for building a jump table in optsize/minsize
6504 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6505 // function was based on.
6506 const uint64_t MinDensity = 40;
6507
6508 if (CaseRange >= UINT64_MAX / 100)
6509 return false; // Avoid multiplication overflows below.
6510
6511 return NumCases * 100 >= CaseRange * MinDensity;
6512}
6513
6515 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6516 uint64_t Range = Diff + 1;
6517 if (Range < Diff)
6518 return false; // Overflow.
6519
6520 return isSwitchDense(Values.size(), Range);
6521}
6522
6523/// Determine whether a lookup table should be built for this switch, based on
6524/// the number of cases, size of the table, and the types of the results.
6525// TODO: We could support larger than legal types by limiting based on the
6526// number of loads required and/or table size. If the constants are small we
6527// could use smaller table entries and extend after the load.
6528static bool
6530 const TargetTransformInfo &TTI, const DataLayout &DL,
6531 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6532 if (SI->getNumCases() > TableSize)
6533 return false; // TableSize overflowed.
6534
6535 bool AllTablesFitInRegister = true;
6536 bool HasIllegalType = false;
6537 for (const auto &I : ResultTypes) {
6538 Type *Ty = I.second;
6539
6540 // Saturate this flag to true.
6541 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6542
6543 // Saturate this flag to false.
6544 AllTablesFitInRegister =
6545 AllTablesFitInRegister &&
6546 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6547
6548 // If both flags saturate, we're done. NOTE: This *only* works with
6549 // saturating flags, and all flags have to saturate first due to the
6550 // non-deterministic behavior of iterating over a dense map.
6551 if (HasIllegalType && !AllTablesFitInRegister)
6552 break;
6553 }
6554
6555 // If each table would fit in a register, we should build it anyway.
6556 if (AllTablesFitInRegister)
6557 return true;
6558
6559 // Don't build a table that doesn't fit in-register if it has illegal types.
6560 if (HasIllegalType)
6561 return false;
6562
6563 return isSwitchDense(SI->getNumCases(), TableSize);
6564}
6565
6567 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6568 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6569 const DataLayout &DL, const TargetTransformInfo &TTI) {
6570 if (MinCaseVal.isNullValue())
6571 return true;
6572 if (MinCaseVal.isNegative() ||
6573 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6574 !HasDefaultResults)
6575 return false;
6576 return all_of(ResultTypes, [&](const auto &KV) {
6577 return SwitchLookupTable::wouldFitInRegister(
6578 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6579 KV.second /* ResultType */);
6580 });
6581}
6582
6583/// Try to reuse the switch table index compare. Following pattern:
6584/// \code
6585/// if (idx < tablesize)
6586/// r = table[idx]; // table does not contain default_value
6587/// else
6588/// r = default_value;
6589/// if (r != default_value)
6590/// ...
6591/// \endcode
6592/// Is optimized to:
6593/// \code
6594/// cond = idx < tablesize;
6595/// if (cond)
6596/// r = table[idx];
6597/// else
6598/// r = default_value;
6599/// if (cond)
6600/// ...
6601/// \endcode
6602/// Jump threading will then eliminate the second if(cond).
6604 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6605 Constant *DefaultValue,
6606 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6607 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6608 if (!CmpInst)
6609 return;
6610
6611 // We require that the compare is in the same block as the phi so that jump
6612 // threading can do its work afterwards.
6613 if (CmpInst->getParent() != PhiBlock)
6614 return;
6615
6616 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6617 if (!CmpOp1)
6618 return;
6619
6620 Value *RangeCmp = RangeCheckBranch->getCondition();
6621 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6622 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6623
6624 // Check if the compare with the default value is constant true or false.
6625 const DataLayout &DL = PhiBlock->getDataLayout();
6627 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6628 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6629 return;
6630
6631 // Check if the compare with the case values is distinct from the default
6632 // compare result.
6633 for (auto ValuePair : Values) {
6635 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6636 if (!CaseConst || CaseConst == DefaultConst ||
6637 (CaseConst != TrueConst && CaseConst != FalseConst))
6638 return;
6639 }
6640
6641 // Check if the branch instruction dominates the phi node. It's a simple
6642 // dominance check, but sufficient for our needs.
6643 // Although this check is invariant in the calling loops, it's better to do it
6644 // at this late stage. Practically we do it at most once for a switch.
6645 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6646 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6647 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6648 return;
6649 }
6650
6651 if (DefaultConst == FalseConst) {
6652 // The compare yields the same result. We can replace it.
6653 CmpInst->replaceAllUsesWith(RangeCmp);
6654 ++NumTableCmpReuses;
6655 } else {
6656 // The compare yields the same result, just inverted. We can replace it.
6657 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6658 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6659 RangeCheckBranch->getIterator());
6660 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6661 ++NumTableCmpReuses;
6662 }
6663}
6664
6665/// If the switch is only used to initialize one or more phi nodes in a common
6666/// successor block with different constant values, replace the switch with
6667/// lookup tables.
6669 DomTreeUpdater *DTU, const DataLayout &DL,
6670 const TargetTransformInfo &TTI) {
6671 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6672
6673 BasicBlock *BB = SI->getParent();
6674 Function *Fn = BB->getParent();
6675 // Only build lookup table when we have a target that supports it or the
6676 // attribute is not set.
6678 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6679 return false;
6680
6681 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6682 // split off a dense part and build a lookup table for that.
6683
6684 // FIXME: This creates arrays of GEPs to constant strings, which means each
6685 // GEP needs a runtime relocation in PIC code. We should just build one big
6686 // string and lookup indices into that.
6687
6688 // Ignore switches with less than three cases. Lookup tables will not make
6689 // them faster, so we don't analyze them.
6690 if (SI->getNumCases() < 3)
6691 return false;
6692
6693 // Figure out the corresponding result for each case value and phi node in the
6694 // common destination, as well as the min and max case values.
6695 assert(!SI->cases().empty());
6696 SwitchInst::CaseIt CI = SI->case_begin();
6697 ConstantInt *MinCaseVal = CI->getCaseValue();
6698 ConstantInt *MaxCaseVal = CI->getCaseValue();
6699
6700 BasicBlock *CommonDest = nullptr;
6701
6702 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6704
6708
6709 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6710 ConstantInt *CaseVal = CI->getCaseValue();
6711 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6712 MinCaseVal = CaseVal;
6713 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6714 MaxCaseVal = CaseVal;
6715
6716 // Resulting value at phi nodes for this case value.
6718 ResultsTy Results;
6719 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6720 Results, DL, TTI))
6721 return false;
6722
6723 // Append the result from this case to the list for each phi.
6724 for (const auto &I : Results) {
6725 PHINode *PHI = I.first;
6726 Constant *Value = I.second;
6727 if (!ResultLists.count(PHI))
6728 PHIs.push_back(PHI);
6729 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6730 }
6731 }
6732
6733 // Keep track of the result types.
6734 for (PHINode *PHI : PHIs) {
6735 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6736 }
6737
6738 uint64_t NumResults = ResultLists[PHIs[0]].size();
6739
6740 // If the table has holes, we need a constant result for the default case
6741 // or a bitmask that fits in a register.
6742 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6743 bool HasDefaultResults =
6744 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6745 DefaultResultsList, DL, TTI);
6746
6747 for (const auto &I : DefaultResultsList) {
6748 PHINode *PHI = I.first;
6749 Constant *Result = I.second;
6750 DefaultResults[PHI] = Result;
6751 }
6752
6753 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6754 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6755 uint64_t TableSize;
6756 if (UseSwitchConditionAsTableIndex)
6757 TableSize = MaxCaseVal->getLimitedValue() + 1;
6758 else
6759 TableSize =
6760 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6761
6762 // If the default destination is unreachable, or if the lookup table covers
6763 // all values of the conditional variable, branch directly to the lookup table
6764 // BB. Otherwise, check that the condition is within the case range.
6765 bool DefaultIsReachable = !SI->defaultDestUndefined();
6766
6767 bool TableHasHoles = (NumResults < TableSize);
6768
6769 // If the table has holes but the default destination doesn't produce any
6770 // constant results, the lookup table entries corresponding to the holes will
6771 // contain undefined values.
6772 bool AllHolesAreUndefined = TableHasHoles && !HasDefaultResults;
6773
6774 // If the default destination doesn't produce a constant result but is still
6775 // reachable, and the lookup table has holes, we need to use a mask to
6776 // determine if the current index should load from the lookup table or jump
6777 // to the default case.
6778 // The mask is unnecessary if the table has holes but the default destination
6779 // is unreachable, as in that case the holes must also be unreachable.
6780 bool NeedMask = AllHolesAreUndefined && DefaultIsReachable;
6781 if (NeedMask) {
6782 // As an extra penalty for the validity test we require more cases.
6783 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6784 return false;
6785 if (!DL.fitsInLegalInteger(TableSize))
6786 return false;
6787 }
6788
6789 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6790 return false;
6791
6792 std::vector<DominatorTree::UpdateType> Updates;
6793
6794 // Compute the maximum table size representable by the integer type we are
6795 // switching upon.
6796 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6797 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6798 assert(MaxTableSize >= TableSize &&
6799 "It is impossible for a switch to have more entries than the max "
6800 "representable value of its input integer type's size.");
6801
6802 // Create the BB that does the lookups.
6803 Module &Mod = *CommonDest->getParent()->getParent();
6804 BasicBlock *LookupBB = BasicBlock::Create(
6805 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6806
6807 // Compute the table index value.
6808 Builder.SetInsertPoint(SI);
6809 Value *TableIndex;
6810 ConstantInt *TableIndexOffset;
6811 if (UseSwitchConditionAsTableIndex) {
6812 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6813 TableIndex = SI->getCondition();
6814 } else {
6815 TableIndexOffset = MinCaseVal;
6816 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6817 // we can try to attach nsw.
6818 bool MayWrap = true;
6819 if (!DefaultIsReachable) {
6820 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6821 (void)Res;
6822 }
6823
6824 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6825 "switch.tableidx", /*HasNUW =*/false,
6826 /*HasNSW =*/!MayWrap);
6827 }
6828
6829 BranchInst *RangeCheckBranch = nullptr;
6830
6831 // Grow the table to cover all possible index values to avoid the range check.
6832 // It will use the default result to fill in the table hole later, so make
6833 // sure it exist.
6834 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6835 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6836 // Grow the table shouldn't have any size impact by checking
6837 // wouldFitInRegister.
6838 // TODO: Consider growing the table also when it doesn't fit in a register
6839 // if no optsize is specified.
6840 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6841 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6842 return SwitchLookupTable::wouldFitInRegister(
6843 DL, UpperBound, KV.second /* ResultType */);
6844 })) {
6845 // There may be some case index larger than the UpperBound (unreachable
6846 // case), so make sure the table size does not get smaller.
6847 TableSize = std::max(UpperBound, TableSize);
6848 // The default branch is unreachable after we enlarge the lookup table.
6849 // Adjust DefaultIsReachable to reuse code path.
6850 DefaultIsReachable = false;
6851 }
6852 }
6853
6854 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6855 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6856 Builder.CreateBr(LookupBB);
6857 if (DTU)
6858 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6859 // Note: We call removeProdecessor later since we need to be able to get the
6860 // PHI value for the default case in case we're using a bit mask.
6861 } else {
6862 Value *Cmp = Builder.CreateICmpULT(
6863 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6864 RangeCheckBranch =
6865 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6866 if (DTU)
6867 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6868 }
6869
6870 // Populate the BB that does the lookups.
6871 Builder.SetInsertPoint(LookupBB);
6872
6873 if (NeedMask) {
6874 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6875 // re-purposed to do the hole check, and we create a new LookupBB.
6876 BasicBlock *MaskBB = LookupBB;
6877 MaskBB->setName("switch.hole_check");
6878 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6879 CommonDest->getParent(), CommonDest);
6880
6881 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6882 // unnecessary illegal types.
6883 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6884 APInt MaskInt(TableSizePowOf2, 0);
6885 APInt One(TableSizePowOf2, 1);
6886 // Build bitmask; fill in a 1 bit for every case.
6887 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6888 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6889 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6890 .getLimitedValue();
6891 MaskInt |= One << Idx;
6892 }
6893 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6894
6895 // Get the TableIndex'th bit of the bitmask.
6896 // If this bit is 0 (meaning hole) jump to the default destination,
6897 // else continue with table lookup.
6898 IntegerType *MapTy = TableMask->getIntegerType();
6899 Value *MaskIndex =
6900 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6901 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6902 Value *LoBit = Builder.CreateTrunc(
6903 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6904 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6905 if (DTU) {
6906 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6907 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6908 }
6909 Builder.SetInsertPoint(LookupBB);
6910 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6911 }
6912
6913 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6914 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6915 // do not delete PHINodes here.
6916 SI->getDefaultDest()->removePredecessor(BB,
6917 /*KeepOneInputPHIs=*/true);
6918 if (DTU)
6919 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6920 }
6921
6922 for (PHINode *PHI : PHIs) {
6923 const ResultListTy &ResultList = ResultLists[PHI];
6924
6925 // Use any value to fill the lookup table holes.
6926 Constant *DV =
6927 AllHolesAreUndefined ? ResultLists[PHI][0].second : DefaultResults[PHI];
6928 StringRef FuncName = Fn->getName();
6929 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6930 DL, FuncName);
6931
6932 Value *Result = Table.buildLookup(TableIndex, Builder);
6933
6934 // Do a small peephole optimization: re-use the switch table compare if
6935 // possible.
6936 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6937 BasicBlock *PhiBlock = PHI->getParent();
6938 // Search for compare instructions which use the phi.
6939 for (auto *User : PHI->users()) {
6940 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6941 }
6942 }
6943
6944 PHI->addIncoming(Result, LookupBB);
6945 }
6946
6947 Builder.CreateBr(CommonDest);
6948 if (DTU)
6949 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6950
6951 // Remove the switch.
6952 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6953 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6954 BasicBlock *Succ = SI->getSuccessor(i);
6955
6956 if (Succ == SI->getDefaultDest())
6957 continue;
6958 Succ->removePredecessor(BB);
6959 if (DTU && RemovedSuccessors.insert(Succ).second)
6960 Updates.push_back({DominatorTree::Delete, BB, Succ});
6961 }
6962 SI->eraseFromParent();
6963
6964 if (DTU)
6965 DTU->applyUpdates(Updates);
6966
6967 ++NumLookupTables;
6968 if (NeedMask)
6969 ++NumLookupTablesHoles;
6970 return true;
6971}
6972
6973/// Try to transform a switch that has "holes" in it to a contiguous sequence
6974/// of cases.
6975///
6976/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6977/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6978///
6979/// This converts a sparse switch into a dense switch which allows better
6980/// lowering and could also allow transforming into a lookup table.
6981static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6982 const DataLayout &DL,
6983 const TargetTransformInfo &TTI) {
6984 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6985 if (CondTy->getIntegerBitWidth() > 64 ||
6986 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6987 return false;
6988 // Only bother with this optimization if there are more than 3 switch cases;
6989 // SDAG will only bother creating jump tables for 4 or more cases.
6990 if (SI->getNumCases() < 4)
6991 return false;
6992
6993 // This transform is agnostic to the signedness of the input or case values. We
6994 // can treat the case values as signed or unsigned. We can optimize more common
6995 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6996 // as signed.
6998 for (const auto &C : SI->cases())
6999 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7000 llvm::sort(Values);
7001
7002 // If the switch is already dense, there's nothing useful to do here.
7003 if (isSwitchDense(Values))
7004 return false;
7005
7006 // First, transform the values such that they start at zero and ascend.
7007 int64_t Base = Values[0];
7008 for (auto &V : Values)
7009 V -= (uint64_t)(Base);
7010
7011 // Now we have signed numbers that have been shifted so that, given enough
7012 // precision, there are no negative values. Since the rest of the transform
7013 // is bitwise only, we switch now to an unsigned representation.
7014
7015 // This transform can be done speculatively because it is so cheap - it
7016 // results in a single rotate operation being inserted.
7017
7018 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7019 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7020 // less than 64.
7021 unsigned Shift = 64;
7022 for (auto &V : Values)
7023 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7024 assert(Shift < 64);
7025 if (Shift > 0)
7026 for (auto &V : Values)
7027 V = (int64_t)((uint64_t)V >> Shift);
7028
7029 if (!isSwitchDense(Values))
7030 // Transform didn't create a dense switch.
7031 return false;
7032
7033 // The obvious transform is to shift the switch condition right and emit a
7034 // check that the condition actually cleanly divided by GCD, i.e.
7035 // C & (1 << Shift - 1) == 0
7036 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7037 //
7038 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7039 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7040 // are nonzero then the switch condition will be very large and will hit the
7041 // default case.
7042
7043 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7044 Builder.SetInsertPoint(SI);
7045 Value *Sub =
7046 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7047 Value *Rot = Builder.CreateIntrinsic(
7048 Ty, Intrinsic::fshl,
7049 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7050 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7051
7052 for (auto Case : SI->cases()) {
7053 auto *Orig = Case.getCaseValue();
7054 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
7055 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7056 }
7057 return true;
7058}
7059
7060/// Tries to transform switch of powers of two to reduce switch range.
7061/// For example, switch like:
7062/// switch (C) { case 1: case 2: case 64: case 128: }
7063/// will be transformed to:
7064/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7065///
7066/// This transformation allows better lowering and could allow transforming into
7067/// a lookup table.
7069 const DataLayout &DL,
7070 const TargetTransformInfo &TTI) {
7071 Value *Condition = SI->getCondition();
7072 LLVMContext &Context = SI->getContext();
7073 auto *CondTy = cast<IntegerType>(Condition->getType());
7074
7075 if (CondTy->getIntegerBitWidth() > 64 ||
7076 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7077 return false;
7078
7079 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7080 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7081 {Condition, ConstantInt::getTrue(Context)}),
7083
7084 if (CttzIntrinsicCost > TTI::TCC_Basic)
7085 // Inserting intrinsic is too expensive.
7086 return false;
7087
7088 // Only bother with this optimization if there are more than 3 switch cases.
7089 // SDAG will only bother creating jump tables for 4 or more cases.
7090 if (SI->getNumCases() < 4)
7091 return false;
7092
7093 // We perform this optimization only for switches with
7094 // unreachable default case.
7095 // This assumtion will save us from checking if `Condition` is a power of two.
7096 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7097 return false;
7098
7099 // Check that switch cases are powers of two.
7101 for (const auto &Case : SI->cases()) {
7102 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7103 if (llvm::has_single_bit(CaseValue))
7104 Values.push_back(CaseValue);
7105 else
7106 return false;
7107 }
7108
7109 // isSwichDense requires case values to be sorted.
7110 llvm::sort(Values);
7111 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7112 llvm::countr_zero(Values.front()) + 1))
7113 // Transform is unable to generate dense switch.
7114 return false;
7115
7116 Builder.SetInsertPoint(SI);
7117
7118 // Replace each case with its trailing zeros number.
7119 for (auto &Case : SI->cases()) {
7120 auto *OrigValue = Case.getCaseValue();
7121 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7122 OrigValue->getValue().countr_zero()));
7123 }
7124
7125 // Replace condition with its trailing zeros number.
7126 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7127 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7128
7129 SI->setCondition(ConditionTrailingZeros);
7130
7131 return true;
7132}
7133
7134bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7135 BasicBlock *BB = SI->getParent();
7136
7137 if (isValueEqualityComparison(SI)) {
7138 // If we only have one predecessor, and if it is a branch on this value,
7139 // see if that predecessor totally determines the outcome of this switch.
7140 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7141 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7142 return requestResimplify();
7143
7144 Value *Cond = SI->getCondition();
7145 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7146 if (simplifySwitchOnSelect(SI, Select))
7147 return requestResimplify();
7148
7149 // If the block only contains the switch, see if we can fold the block
7150 // away into any preds.
7151 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7152 if (foldValueComparisonIntoPredecessors(SI, Builder))
7153 return requestResimplify();
7154 }
7155
7156 // Try to transform the switch into an icmp and a branch.
7157 // The conversion from switch to comparison may lose information on
7158 // impossible switch values, so disable it early in the pipeline.
7159 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7160 return requestResimplify();
7161
7162 // Remove unreachable cases.
7163 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7164 return requestResimplify();
7165
7166 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7167 return requestResimplify();
7168
7169 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7170 return requestResimplify();
7171
7172 // The conversion from switch to lookup tables results in difficult-to-analyze
7173 // code and makes pruning branches much harder. This is a problem if the
7174 // switch expression itself can still be restricted as a result of inlining or
7175 // CVP. Therefore, only apply this transformation during late stages of the
7176 // optimisation pipeline.
7177 if (Options.ConvertSwitchToLookupTable &&
7178 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7179 return requestResimplify();
7180
7181 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7182 return requestResimplify();
7183
7184 if (reduceSwitchRange(SI, Builder, DL, TTI))
7185 return requestResimplify();
7186
7187 if (HoistCommon &&
7188 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7189 return requestResimplify();
7190
7191 return false;
7192}
7193
7194bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7195 BasicBlock *BB = IBI->getParent();
7196 bool Changed = false;
7197
7198 // Eliminate redundant destinations.
7201 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7202 BasicBlock *Dest = IBI->getDestination(i);
7203 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7204 if (!Dest->hasAddressTaken())
7205 RemovedSuccs.insert(Dest);
7206 Dest->removePredecessor(BB);
7207 IBI->removeDestination(i);
7208 --i;
7209 --e;
7210 Changed = true;
7211 }
7212 }
7213
7214 if (DTU) {
7215 std::vector<DominatorTree::UpdateType> Updates;
7216 Updates.reserve(RemovedSuccs.size());
7217 for (auto *RemovedSucc : RemovedSuccs)
7218 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7219 DTU->applyUpdates(Updates);
7220 }
7221
7222 if (IBI->getNumDestinations() == 0) {
7223 // If the indirectbr has no successors, change it to unreachable.
7224 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7226 return true;
7227 }
7228
7229 if (IBI->getNumDestinations() == 1) {
7230 // If the indirectbr has one successor, change it to a direct branch.
7233 return true;
7234 }
7235
7236 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7237 if (simplifyIndirectBrOnSelect(IBI, SI))
7238 return requestResimplify();
7239 }
7240 return Changed;
7241}
7242
7243/// Given an block with only a single landing pad and a unconditional branch
7244/// try to find another basic block which this one can be merged with. This
7245/// handles cases where we have multiple invokes with unique landing pads, but
7246/// a shared handler.
7247///
7248/// We specifically choose to not worry about merging non-empty blocks
7249/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7250/// practice, the optimizer produces empty landing pad blocks quite frequently
7251/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7252/// sinking in this file)
7253///
7254/// This is primarily a code size optimization. We need to avoid performing
7255/// any transform which might inhibit optimization (such as our ability to
7256/// specialize a particular handler via tail commoning). We do this by not
7257/// merging any blocks which require us to introduce a phi. Since the same
7258/// values are flowing through both blocks, we don't lose any ability to
7259/// specialize. If anything, we make such specialization more likely.
7260///
7261/// TODO - This transformation could remove entries from a phi in the target
7262/// block when the inputs in the phi are the same for the two blocks being
7263/// merged. In some cases, this could result in removal of the PHI entirely.
7265 BasicBlock *BB, DomTreeUpdater *DTU) {
7266 auto Succ = BB->getUniqueSuccessor();
7267 assert(Succ);
7268 // If there's a phi in the successor block, we'd likely have to introduce
7269 // a phi into the merged landing pad block.
7270 if (isa<PHINode>(*Succ->begin()))
7271 return false;
7272
7273 for (BasicBlock *OtherPred : predecessors(Succ)) {
7274 if (BB == OtherPred)
7275 continue;
7276 BasicBlock::iterator I = OtherPred->begin();
7277 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7278 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7279 continue;
7280 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7281 ;
7282 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7283 if (!BI2 || !BI2->isIdenticalTo(BI))
7284 continue;
7285
7286 std::vector<DominatorTree::UpdateType> Updates;
7287
7288 // We've found an identical block. Update our predecessors to take that
7289 // path instead and make ourselves dead.
7291 for (BasicBlock *Pred : UniquePreds) {
7292 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7293 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7294 "unexpected successor");
7295 II->setUnwindDest(OtherPred);
7296 if (DTU) {
7297 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7298 Updates.push_back({DominatorTree::Delete, Pred, BB});
7299 }
7300 }
7301
7302 // The debug info in OtherPred doesn't cover the merged control flow that
7303 // used to go through BB. We need to delete it or update it.
7304 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7305 if (isa<DbgInfoIntrinsic>(Inst))
7306 Inst.eraseFromParent();
7307
7309 for (BasicBlock *Succ : UniqueSuccs) {
7310 Succ->removePredecessor(BB);
7311 if (DTU)
7312 Updates.push_back({DominatorTree::Delete, BB, Succ});
7313 }
7314
7315 IRBuilder<> Builder(BI);
7316 Builder.CreateUnreachable();
7317 BI->eraseFromParent();
7318 if (DTU)
7319 DTU->applyUpdates(Updates);
7320 return true;
7321 }
7322 return false;
7323}
7324
7325bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7326 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7327 : simplifyCondBranch(Branch, Builder);
7328}
7329
7330bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7331 IRBuilder<> &Builder) {
7332 BasicBlock *BB = BI->getParent();
7333 BasicBlock *Succ = BI->getSuccessor(0);
7334
7335 // If the Terminator is the only non-phi instruction, simplify the block.
7336 // If LoopHeader is provided, check if the block or its successor is a loop
7337 // header. (This is for early invocations before loop simplify and
7338 // vectorization to keep canonical loop forms for nested loops. These blocks
7339 // can be eliminated when the pass is invoked later in the back-end.)
7340 // Note that if BB has only one predecessor then we do not introduce new
7341 // backedge, so we can eliminate BB.
7342 bool NeedCanonicalLoop =
7343 Options.NeedCanonicalLoop &&
7344 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7345 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7347 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7348 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7349 return true;
7350
7351 // If the only instruction in the block is a seteq/setne comparison against a
7352 // constant, try to simplify the block.
7353 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7354 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7355 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7356 ;
7357 if (I->isTerminator() &&
7358 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7359 return true;
7360 }
7361
7362 // See if we can merge an empty landing pad block with another which is
7363 // equivalent.
7364 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7365 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7366 ;
7367 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7368 return true;
7369 }
7370
7371 // If this basic block is ONLY a compare and a branch, and if a predecessor
7372 // branches to us and our successor, fold the comparison into the
7373 // predecessor and use logical operations to update the incoming value
7374 // for PHI nodes in common successor.
7375 if (Options.SpeculateBlocks &&
7376 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7377 Options.BonusInstThreshold))
7378 return requestResimplify();
7379 return false;
7380}
7381
7383 BasicBlock *PredPred = nullptr;
7384 for (auto *P : predecessors(BB)) {
7385 BasicBlock *PPred = P->getSinglePredecessor();
7386 if (!PPred || (PredPred && PredPred != PPred))
7387 return nullptr;
7388 PredPred = PPred;
7389 }
7390 return PredPred;
7391}
7392
7393/// Fold the following pattern:
7394/// bb0:
7395/// br i1 %cond1, label %bb1, label %bb2
7396/// bb1:
7397/// br i1 %cond2, label %bb3, label %bb4
7398/// bb2:
7399/// br i1 %cond2, label %bb4, label %bb3
7400/// bb3:
7401/// ...
7402/// bb4:
7403/// ...
7404/// into
7405/// bb0:
7406/// %cond = xor i1 %cond1, %cond2
7407/// br i1 %cond, label %bb4, label %bb3
7408/// bb3:
7409/// ...
7410/// bb4:
7411/// ...
7412/// NOTE: %cond2 always dominates the terminator of bb0.
7414 BasicBlock *BB = BI->getParent();
7415 BasicBlock *BB1 = BI->getSuccessor(0);
7416 BasicBlock *BB2 = BI->getSuccessor(1);
7417 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7418 if (Succ == BB)
7419 return false;
7420 if (&Succ->front() != Succ->getTerminator())
7421 return false;
7422 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7423 if (!SuccBI || !SuccBI->isConditional())
7424 return false;
7425 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7426 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7427 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7428 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7429 };
7430 BranchInst *BB1BI, *BB2BI;
7431 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7432 return false;
7433
7434 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7435 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7436 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7437 return false;
7438
7439 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7440 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7441 IRBuilder<> Builder(BI);
7442 BI->setCondition(
7443 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7444 BB1->removePredecessor(BB);
7445 BI->setSuccessor(0, BB4);
7446 BB2->removePredecessor(BB);
7447 BI->setSuccessor(1, BB3);
7448 if (DTU) {
7450 Updates.push_back({DominatorTree::Delete, BB, BB1});
7451 Updates.push_back({DominatorTree::Insert, BB, BB4});
7452 Updates.push_back({DominatorTree::Delete, BB, BB2});
7453 Updates.push_back({DominatorTree::Insert, BB, BB3});
7454
7455 DTU->applyUpdates(Updates);
7456 }
7457 bool HasWeight = false;
7458 uint64_t BBTWeight, BBFWeight;
7459 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
7460 HasWeight = true;
7461 else
7462 BBTWeight = BBFWeight = 1;
7463 uint64_t BB1TWeight, BB1FWeight;
7464 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
7465 HasWeight = true;
7466 else
7467 BB1TWeight = BB1FWeight = 1;
7468 uint64_t BB2TWeight, BB2FWeight;
7469 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
7470 HasWeight = true;
7471 else
7472 BB2TWeight = BB2FWeight = 1;
7473 if (HasWeight) {
7474 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
7475 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
7476 fitWeights(Weights);
7477 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
7478 }
7479 return true;
7480}
7481
7482bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7483 assert(
7484 !isa<ConstantInt>(BI->getCondition()) &&
7485 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7486 "Tautological conditional branch should have been eliminated already.");
7487
7488 BasicBlock *BB = BI->getParent();
7489 if (!Options.SimplifyCondBranch ||
7490 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7491 return false;
7492
7493 // Conditional branch
7494 if (isValueEqualityComparison(BI)) {
7495 // If we only have one predecessor, and if it is a branch on this value,
7496 // see if that predecessor totally determines the outcome of this
7497 // switch.
7498 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7499 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7500 return requestResimplify();
7501
7502 // This block must be empty, except for the setcond inst, if it exists.
7503 // Ignore dbg and pseudo intrinsics.
7504 auto I = BB->instructionsWithoutDebug(true).begin();
7505 if (&*I == BI) {
7506 if (foldValueComparisonIntoPredecessors(BI, Builder))
7507 return requestResimplify();
7508 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7509 ++I;
7510 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
7511 return requestResimplify();
7512 }
7513 }
7514
7515 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7516 if (simplifyBranchOnICmpChain(BI, Builder, DL))
7517 return true;
7518
7519 // If this basic block has dominating predecessor blocks and the dominating
7520 // blocks' conditions imply BI's condition, we know the direction of BI.
7521 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7522 if (Imp) {
7523 // Turn this into a branch on constant.
7524 auto *OldCond = BI->getCondition();
7525 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7526 : ConstantInt::getFalse(BB->getContext());
7527 BI->setCondition(TorF);
7529 return requestResimplify();
7530 }
7531
7532 // If this basic block is ONLY a compare and a branch, and if a predecessor
7533 // branches to us and one of our successors, fold the comparison into the
7534 // predecessor and use logical operations to pick the right destination.
7535 if (Options.SpeculateBlocks &&
7536 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7537 Options.BonusInstThreshold))
7538 return requestResimplify();
7539
7540 // We have a conditional branch to two blocks that are only reachable
7541 // from BI. We know that the condbr dominates the two blocks, so see if
7542 // there is any identical code in the "then" and "else" blocks. If so, we
7543 // can hoist it up to the branching block.
7544 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7545 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7546 if (HoistCommon && hoistCommonCodeFromSuccessors(
7547 BI->getParent(), !Options.HoistCommonInsts))
7548 return requestResimplify();
7549 } else {
7550 // If Successor #1 has multiple preds, we may be able to conditionally
7551 // execute Successor #0 if it branches to Successor #1.
7552 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7553 if (Succ0TI->getNumSuccessors() == 1 &&
7554 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7555 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
7556 return requestResimplify();
7557 }
7558 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7559 // If Successor #0 has multiple preds, we may be able to conditionally
7560 // execute Successor #1 if it branches to Successor #0.
7561 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7562 if (Succ1TI->getNumSuccessors() == 1 &&
7563 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7564 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
7565 return requestResimplify();
7566 }
7567
7568 // If this is a branch on something for which we know the constant value in
7569 // predecessors (e.g. a phi node in the current block), thread control
7570 // through this block.
7572 return requestResimplify();
7573
7574 // Scan predecessor blocks for conditional branches.
7575 for (BasicBlock *Pred : predecessors(BB))
7576 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7577 if (PBI != BI && PBI->isConditional())
7578 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7579 return requestResimplify();
7580
7581 // Look for diamond patterns.
7582 if (MergeCondStores)
7584 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7585 if (PBI != BI && PBI->isConditional())
7586 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7587 return requestResimplify();
7588
7589 // Look for nested conditional branches.
7590 if (mergeNestedCondBranch(BI, DTU))
7591 return requestResimplify();
7592
7593 return false;
7594}
7595
7596/// Check if passing a value to an instruction will cause undefined behavior.
7597static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7598 Constant *C = dyn_cast<Constant>(V);
7599 if (!C)
7600 return false;
7601
7602 if (I->use_empty())
7603 return false;
7604
7605 if (C->isNullValue() || isa<UndefValue>(C)) {
7606 // Only look at the first use we can handle, avoid hurting compile time with
7607 // long uselists
7608 auto FindUse = llvm::find_if(I->users(), [](auto *U) {
7609 auto *Use = cast<Instruction>(U);
7610 // Change this list when we want to add new instructions.
7611 switch (Use->getOpcode()) {
7612 default:
7613 return false;
7614 case Instruction::GetElementPtr:
7615 case Instruction::Ret:
7616 case Instruction::BitCast:
7617 case Instruction::Load:
7618 case Instruction::Store:
7619 case Instruction::Call:
7620 case Instruction::CallBr:
7621 case Instruction::Invoke:
7622 return true;
7623 }
7624 });
7625 if (FindUse == I->user_end())
7626 return false;
7627 auto *Use = cast<Instruction>(*FindUse);
7628 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7629 // before I in the block. The latter two can be the case if Use is a
7630 // PHI node.
7631 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7632 return false;
7633
7634 // Now make sure that there are no instructions in between that can alter
7635 // control flow (eg. calls)
7636 auto InstrRange =
7637 make_range(std::next(I->getIterator()), Use->getIterator());
7638 if (any_of(InstrRange, [](Instruction &I) {
7640 }))
7641 return false;
7642
7643 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7644 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7645 if (GEP->getPointerOperand() == I) {
7646 // The current base address is null, there are four cases to consider:
7647 // getelementptr (TY, null, 0) -> null
7648 // getelementptr (TY, null, not zero) -> may be modified
7649 // getelementptr inbounds (TY, null, 0) -> null
7650 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7651 // undefined?
7652 if (!GEP->hasAllZeroIndices() &&
7653 (!GEP->isInBounds() ||
7654 NullPointerIsDefined(GEP->getFunction(),
7655 GEP->getPointerAddressSpace())))
7656 PtrValueMayBeModified = true;
7657 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7658 }
7659
7660 // Look through return.
7661 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7662 bool HasNoUndefAttr =
7663 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7664 // Return undefined to a noundef return value is undefined.
7665 if (isa<UndefValue>(C) && HasNoUndefAttr)
7666 return true;
7667 // Return null to a nonnull+noundef return value is undefined.
7668 if (C->isNullValue() && HasNoUndefAttr &&
7669 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7670 return !PtrValueMayBeModified;
7671 }
7672 }
7673
7674 // Load from null is undefined.
7675 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7676 if (!LI->isVolatile())
7677 return !NullPointerIsDefined(LI->getFunction(),
7678 LI->getPointerAddressSpace());
7679
7680 // Store to null is undefined.
7681 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7682 if (!SI->isVolatile())
7683 return (!NullPointerIsDefined(SI->getFunction(),
7684 SI->getPointerAddressSpace())) &&
7685 SI->getPointerOperand() == I;
7686
7687 // llvm.assume(false/undef) always triggers immediate UB.
7688 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
7689 // Ignore assume operand bundles.
7690 if (I == Assume->getArgOperand(0))
7691 return true;
7692 }
7693
7694 if (auto *CB = dyn_cast<CallBase>(Use)) {
7695 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7696 return false;
7697 // A call to null is undefined.
7698 if (CB->getCalledOperand() == I)
7699 return true;
7700
7701 if (C->isNullValue()) {
7702 for (const llvm::Use &Arg : CB->args())
7703 if (Arg == I) {
7704 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7705 if (CB->isPassingUndefUB(ArgIdx) &&
7706 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7707 // Passing null to a nonnnull+noundef argument is undefined.
7708 return !PtrValueMayBeModified;
7709 }
7710 }
7711 } else if (isa<UndefValue>(C)) {
7712 // Passing undef to a noundef argument is undefined.
7713 for (const llvm::Use &Arg : CB->args())
7714 if (Arg == I) {
7715 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7716 if (CB->isPassingUndefUB(ArgIdx)) {
7717 // Passing undef to a noundef argument is undefined.
7718 return true;
7719 }
7720 }
7721 }
7722 }
7723 }
7724 return false;
7725}
7726
7727/// If BB has an incoming value that will always trigger undefined behavior
7728/// (eg. null pointer dereference), remove the branch leading here.
7730 DomTreeUpdater *DTU,
7731 AssumptionCache *AC) {
7732 for (PHINode &PHI : BB->phis())
7733 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7734 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7735 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7736 Instruction *T = Predecessor->getTerminator();
7737 IRBuilder<> Builder(T);
7738 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7739 BB->removePredecessor(Predecessor);
7740 // Turn unconditional branches into unreachables and remove the dead
7741 // destination from conditional branches.
7742 if (BI->isUnconditional())
7743 Builder.CreateUnreachable();
7744 else {
7745 // Preserve guarding condition in assume, because it might not be
7746 // inferrable from any dominating condition.
7747 Value *Cond = BI->getCondition();
7748 CallInst *Assumption;
7749 if (BI->getSuccessor(0) == BB)
7750 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7751 else
7752 Assumption = Builder.CreateAssumption(Cond);
7753 if (AC)
7754 AC->registerAssumption(cast<AssumeInst>(Assumption));
7755 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7756 : BI->getSuccessor(0));
7757 }
7758 BI->eraseFromParent();
7759 if (DTU)
7760 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7761 return true;
7762 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7763 // Redirect all branches leading to UB into
7764 // a newly created unreachable block.
7765 BasicBlock *Unreachable = BasicBlock::Create(
7766 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7767 Builder.SetInsertPoint(Unreachable);
7768 // The new block contains only one instruction: Unreachable
7769 Builder.CreateUnreachable();
7770 for (const auto &Case : SI->cases())
7771 if (Case.getCaseSuccessor() == BB) {
7772 BB->removePredecessor(Predecessor);
7773 Case.setSuccessor(Unreachable);
7774 }
7775 if (SI->getDefaultDest() == BB) {
7776 BB->removePredecessor(Predecessor);
7777 SI->setDefaultDest(Unreachable);
7778 }
7779
7780 if (DTU)
7781 DTU->applyUpdates(
7782 { { DominatorTree::Insert, Predecessor, Unreachable },
7783 { DominatorTree::Delete, Predecessor, BB } });
7784 return true;
7785 }
7786 }
7787
7788 return false;
7789}
7790
7791bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7792 bool Changed = false;
7793
7794 assert(BB && BB->getParent() && "Block not embedded in function!");
7795 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7796
7797 // Remove basic blocks that have no predecessors (except the entry block)...
7798 // or that just have themself as a predecessor. These are unreachable.
7799 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7800 BB->getSinglePredecessor() == BB) {
7801 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7802 DeleteDeadBlock(BB, DTU);
7803 return true;
7804 }
7805
7806 // Check to see if we can constant propagate this terminator instruction
7807 // away...
7808 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7809 /*TLI=*/nullptr, DTU);
7810
7811 // Check for and eliminate duplicate PHI nodes in this block.
7812 Changed |= EliminateDuplicatePHINodes(BB);
7813
7814 // Check for and remove branches that will always cause undefined behavior.
7816 return requestResimplify();
7817
7818 // Merge basic blocks into their predecessor if there is only one distinct
7819 // pred, and if there is only one distinct successor of the predecessor, and
7820 // if there are no PHI nodes.
7821 if (MergeBlockIntoPredecessor(BB, DTU))
7822 return true;
7823
7824 if (SinkCommon && Options.SinkCommonInsts)
7825 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
7826 mergeCompatibleInvokes(BB, DTU)) {
7827 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7828 // so we may now how duplicate PHI's.
7829 // Let's rerun EliminateDuplicatePHINodes() first,
7830 // before foldTwoEntryPHINode() potentially converts them into select's,
7831 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7832 return true;
7833 }
7834
7835 IRBuilder<> Builder(BB);
7836
7837 if (Options.SpeculateBlocks &&
7838 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7839 // If there is a trivial two-entry PHI node in this basic block, and we can
7840 // eliminate it, do so now.
7841 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7842 if (PN->getNumIncomingValues() == 2)
7843 if (foldTwoEntryPHINode(PN, TTI, DTU, DL,
7844 Options.SpeculateUnpredictables))
7845 return true;
7846 }
7847
7849 Builder.SetInsertPoint(Terminator);
7850 switch (Terminator->getOpcode()) {
7851 case Instruction::Br:
7852 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7853 break;
7854 case Instruction::Resume:
7855 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7856 break;
7857 case Instruction::CleanupRet:
7858 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7859 break;
7860 case Instruction::Switch:
7861 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7862 break;
7863 case Instruction::Unreachable:
7864 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7865 break;
7866 case Instruction::IndirectBr:
7867 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7868 break;
7869 }
7870
7871 return Changed;
7872}
7873
7874bool SimplifyCFGOpt::run(BasicBlock *BB) {
7875 bool Changed = false;
7876
7877 // Repeated simplify BB as long as resimplification is requested.
7878 do {
7879 Resimplify = false;
7880
7881 // Perform one round of simplifcation. Resimplify flag will be set if
7882 // another iteration is requested.
7883 Changed |= simplifyOnce(BB);
7884 } while (Resimplify);
7885
7886 return Changed;
7887}
7888
7891 ArrayRef<WeakVH> LoopHeaders) {
7892 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
7893 Options)
7894 .run(BB);
7895}
#define Fail
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1627
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:378
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:717
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
The address of a basic block.
Definition: Constants.h:890
BasicBlock * getBasicBlock() const
Definition: Constants.h:919
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2599
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isNegative() const
Definition: Constants.h:201
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:185
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
iterator begin()
Definition: Function.h:851
size_t size() const
Definition: Function.h:856
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2059
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1280
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1891
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1160
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2261
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1137
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1807
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:552
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1820
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1683
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1131
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1693
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2216
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1536
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1378
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:381
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1713
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:932
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:174
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:384
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:441
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Align getAlign() const
Definition: Instructions.h:329
bool isSimple() const
Definition: Instructions.h:366
Value * getValueOperand()
Definition: Instructions.h:374
bool isUnordered() const
Definition: Instructions.h:368
Value * getPointerOperand()
Definition: Instructions.h:377
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:221
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1808
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2182
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2038
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2090
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:698
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1118
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2132
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3164
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3611
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4113
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1997
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2050
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1485
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254