LLVM 19.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
69#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <climits>
80#include <cstddef>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "simplifycfg"
94
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
121 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122 cl::init(20),
123 cl::desc("Allow reordering across at most this many "
124 "instructions when hoisting"));
125
126static cl::opt<bool>
127 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
128 cl::desc("Sink common instructions down to the end block"));
129
131 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
132 cl::desc("Hoist conditional stores if an unconditional store precedes"));
133
135 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
136 cl::desc("Hoist conditional stores even if an unconditional store does not "
137 "precede - hoist multiple conditional stores into a single "
138 "predicated store"));
139
141 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
142 cl::desc("When merging conditional stores, do so even if the resultant "
143 "basic blocks are unlikely to be if-converted as a result"));
144
146 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
147 cl::desc("Allow exactly one expensive instruction to be speculatively "
148 "executed"));
149
151 "max-speculation-depth", cl::Hidden, cl::init(10),
152 cl::desc("Limit maximum recursion depth when calculating costs of "
153 "speculatively executed instructions"));
154
155static cl::opt<int>
156 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157 cl::init(10),
158 cl::desc("Max size of a block which is still considered "
159 "small enough to thread through"));
160
161// Two is chosen to allow one negation and a logical combine.
163 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164 cl::init(2),
165 cl::desc("Maximum cost of combining conditions when "
166 "folding branches"));
167
169 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170 cl::init(2),
171 cl::desc("Multiplier to apply to threshold when determining whether or not "
172 "to fold branch to common destination when vector operations are "
173 "present"));
174
176 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
177 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
178
180 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
181 cl::desc("Limit cases to analyze when converting a switch to select"));
182
183STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184STATISTIC(NumLinearMaps,
185 "Number of switch instructions turned into linear mapping");
186STATISTIC(NumLookupTables,
187 "Number of switch instructions turned into lookup tables");
189 NumLookupTablesHoles,
190 "Number of switch instructions turned into lookup tables (holes checked)");
191STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192STATISTIC(NumFoldValueComparisonIntoPredecessors,
193 "Number of value comparisons folded into predecessor basic blocks");
194STATISTIC(NumFoldBranchToCommonDest,
195 "Number of branches folded into predecessor basic block");
197 NumHoistCommonCode,
198 "Number of common instruction 'blocks' hoisted up to the begin block");
199STATISTIC(NumHoistCommonInstrs,
200 "Number of common instructions hoisted up to the begin block");
201STATISTIC(NumSinkCommonCode,
202 "Number of common instruction 'blocks' sunk down to the end block");
203STATISTIC(NumSinkCommonInstrs,
204 "Number of common instructions sunk down to the end block");
205STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206STATISTIC(NumInvokes,
207 "Number of invokes with empty resume blocks simplified into calls");
208STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211namespace {
212
213// The first field contains the value that the switch produces when a certain
214// case group is selected, and the second field is a vector containing the
215// cases composing the case group.
216using SwitchCaseResultVectorTy =
218
219// The first field contains the phi node that generates a result of the switch
220// and the second field contains the value generated for a certain case in the
221// switch for that PHI.
222using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
223
224/// ValueEqualityComparisonCase - Represents a case of a switch.
225struct ValueEqualityComparisonCase {
227 BasicBlock *Dest;
228
229 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
230 : Value(Value), Dest(Dest) {}
231
232 bool operator<(ValueEqualityComparisonCase RHS) const {
233 // Comparing pointers is ok as we only rely on the order for uniquing.
234 return Value < RHS.Value;
235 }
236
237 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
238};
239
240class SimplifyCFGOpt {
242 DomTreeUpdater *DTU;
243 const DataLayout &DL;
244 ArrayRef<WeakVH> LoopHeaders;
246 bool Resimplify;
247
248 Value *isValueEqualityComparison(Instruction *TI);
249 BasicBlock *GetValueEqualityComparisonCases(
250 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252 BasicBlock *Pred,
253 IRBuilder<> &Builder);
254 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
255 Instruction *PTI,
256 IRBuilder<> &Builder);
257 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
258 IRBuilder<> &Builder);
259
260 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261 bool simplifySingleResume(ResumeInst *RI);
262 bool simplifyCommonResume(ResumeInst *RI);
263 bool simplifyCleanupReturn(CleanupReturnInst *RI);
264 bool simplifyUnreachable(UnreachableInst *UI);
265 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266 bool simplifyIndirectBr(IndirectBrInst *IBI);
267 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272 IRBuilder<> &Builder);
273
274 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
275 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276 Instruction *TI, Instruction *I1,
277 SmallVectorImpl<Instruction *> &OtherSuccTIs);
278 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
279 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
280 BasicBlock *TrueBB, BasicBlock *FalseBB,
281 uint32_t TrueWeight, uint32_t FalseWeight);
282 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283 const DataLayout &DL);
284 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
285 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
286 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288public:
289 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291 const SimplifyCFGOptions &Opts)
292 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
293 assert((!DTU || !DTU->hasPostDomTree()) &&
294 "SimplifyCFG is not yet capable of maintaining validity of a "
295 "PostDomTree, so don't ask for it.");
296 }
297
298 bool simplifyOnce(BasicBlock *BB);
299 bool run(BasicBlock *BB);
300
301 // Helper to set Resimplify and return change indication.
302 bool requestResimplify() {
303 Resimplify = true;
304 return true;
305 }
306};
307
308} // end anonymous namespace
309
310/// Return true if all the PHI nodes in the basic block \p BB
311/// receive compatible (identical) incoming values when coming from
312/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313///
314/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315/// is provided, and *both* of the values are present in the set,
316/// then they are considered equal.
318 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
319 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
320 assert(IncomingBlocks.size() == 2 &&
321 "Only for a pair of incoming blocks at the time!");
322
323 // FIXME: it is okay if one of the incoming values is an `undef` value,
324 // iff the other incoming value is guaranteed to be a non-poison value.
325 // FIXME: it is okay if one of the incoming values is a `poison` value.
326 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
328 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
329 if (IV0 == IV1)
330 return true;
331 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
332 EquivalenceSet->contains(IV1))
333 return true;
334 return false;
335 });
336}
337
338/// Return true if it is safe to merge these two
339/// terminator instructions together.
340static bool
342 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
343 if (SI1 == SI2)
344 return false; // Can't merge with self!
345
346 // It is not safe to merge these two switch instructions if they have a common
347 // successor, and if that successor has a PHI node, and if *that* PHI node has
348 // conflicting incoming values from the two switch blocks.
349 BasicBlock *SI1BB = SI1->getParent();
350 BasicBlock *SI2BB = SI2->getParent();
351
352 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
353 bool Fail = false;
354 for (BasicBlock *Succ : successors(SI2BB)) {
355 if (!SI1Succs.count(Succ))
356 continue;
357 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
358 continue;
359 Fail = true;
360 if (FailBlocks)
361 FailBlocks->insert(Succ);
362 else
363 break;
364 }
365
366 return !Fail;
367}
368
369/// Update PHI nodes in Succ to indicate that there will now be entries in it
370/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371/// will be the same as those coming in from ExistPred, an existing predecessor
372/// of Succ.
373static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
374 BasicBlock *ExistPred,
375 MemorySSAUpdater *MSSAU = nullptr) {
376 for (PHINode &PN : Succ->phis())
377 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
378 if (MSSAU)
379 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
380 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
381}
382
383/// Compute an abstract "cost" of speculating the given instruction,
384/// which is assumed to be safe to speculate. TCC_Free means cheap,
385/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386/// expensive.
388 const TargetTransformInfo &TTI) {
389 assert((!isa<Instruction>(I) ||
390 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391 "Instruction is not safe to speculatively execute!");
393}
394
395/// If we have a merge point of an "if condition" as accepted above,
396/// return true if the specified value dominates the block. We
397/// don't handle the true generality of domination here, just a special case
398/// which works well enough for us.
399///
400/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401/// see if V (which must be an instruction) and its recursive operands
402/// that do not dominate BB have a combined cost lower than Budget and
403/// are non-trapping. If both are true, the instruction is inserted into the
404/// set and true is returned.
405///
406/// The cost for most non-trapping instructions is defined as 1 except for
407/// Select whose cost is 2.
408///
409/// After this function returns, Cost is increased by the cost of
410/// V plus its non-dominating operands. If that cost is greater than
411/// Budget, false is returned and Cost is undefined.
413 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
415 InstructionCost Budget,
417 unsigned Depth = 0) {
418 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419 // so limit the recursion depth.
420 // TODO: While this recursion limit does prevent pathological behavior, it
421 // would be better to track visited instructions to avoid cycles.
423 return false;
424
425 Instruction *I = dyn_cast<Instruction>(V);
426 if (!I) {
427 // Non-instructions dominate all instructions and can be executed
428 // unconditionally.
429 return true;
430 }
431 BasicBlock *PBB = I->getParent();
432
433 // We don't want to allow weird loops that might have the "if condition" in
434 // the bottom of this block.
435 if (PBB == BB)
436 return false;
437
438 // If this instruction is defined in a block that contains an unconditional
439 // branch to BB, then it must be in the 'conditional' part of the "if
440 // statement". If not, it definitely dominates the region.
441 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
442 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
443 return true;
444
445 // If we have seen this instruction before, don't count it again.
446 if (AggressiveInsts.count(I))
447 return true;
448
449 // Okay, it looks like the instruction IS in the "condition". Check to
450 // see if it's a cheap instruction to unconditionally compute, and if it
451 // only uses stuff defined outside of the condition. If so, hoist it out.
453 return false;
454
456
457 // Allow exactly one instruction to be speculated regardless of its cost
458 // (as long as it is safe to do so).
459 // This is intended to flatten the CFG even if the instruction is a division
460 // or other expensive operation. The speculation of an expensive instruction
461 // is expected to be undone in CodeGenPrepare if the speculation has not
462 // enabled further IR optimizations.
463 if (Cost > Budget &&
464 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
465 !Cost.isValid()))
466 return false;
467
468 // Okay, we can only really hoist these out if their operands do
469 // not take us over the cost threshold.
470 for (Use &Op : I->operands())
471 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
472 Depth + 1))
473 return false;
474 // Okay, it's safe to do this! Remember this instruction.
475 AggressiveInsts.insert(I);
476 return true;
477}
478
479/// Extract ConstantInt from value, looking through IntToPtr
480/// and PointerNullValue. Return NULL if value is not a constant int.
482 // Normal constant int.
483 ConstantInt *CI = dyn_cast<ConstantInt>(V);
484 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
485 DL.isNonIntegralPointerType(V->getType()))
486 return CI;
487
488 // This is some kind of pointer constant. Turn it into a pointer-sized
489 // ConstantInt if possible.
490 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
491
492 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
493 if (isa<ConstantPointerNull>(V))
494 return ConstantInt::get(PtrTy, 0);
495
496 // IntToPtr const int.
497 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
498 if (CE->getOpcode() == Instruction::IntToPtr)
499 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
500 // The constant is very likely to have the right type already.
501 if (CI->getType() == PtrTy)
502 return CI;
503 else
504 return cast<ConstantInt>(
505 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
506 }
507 return nullptr;
508}
509
510namespace {
511
512/// Given a chain of or (||) or and (&&) comparison of a value against a
513/// constant, this will try to recover the information required for a switch
514/// structure.
515/// It will depth-first traverse the chain of comparison, seeking for patterns
516/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517/// representing the different cases for the switch.
518/// Note that if the chain is composed of '||' it will build the set of elements
519/// that matches the comparisons (i.e. any of this value validate the chain)
520/// while for a chain of '&&' it will build the set elements that make the test
521/// fail.
522struct ConstantComparesGatherer {
523 const DataLayout &DL;
524
525 /// Value found for the switch comparison
526 Value *CompValue = nullptr;
527
528 /// Extra clause to be checked before the switch
529 Value *Extra = nullptr;
530
531 /// Set of integers to match in switch
533
534 /// Number of comparisons matched in the and/or chain
535 unsigned UsedICmps = 0;
536
537 /// Construct and compute the result for the comparison instruction Cond
538 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
539 gather(Cond);
540 }
541
542 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543 ConstantComparesGatherer &
544 operator=(const ConstantComparesGatherer &) = delete;
545
546private:
547 /// Try to set the current value used for the comparison, it succeeds only if
548 /// it wasn't set before or if the new value is the same as the old one
549 bool setValueOnce(Value *NewVal) {
550 if (CompValue && CompValue != NewVal)
551 return false;
552 CompValue = NewVal;
553 return (CompValue != nullptr);
554 }
555
556 /// Try to match Instruction "I" as a comparison against a constant and
557 /// populates the array Vals with the set of values that match (or do not
558 /// match depending on isEQ).
559 /// Return false on failure. On success, the Value the comparison matched
560 /// against is placed in CompValue.
561 /// If CompValue is already set, the function is expected to fail if a match
562 /// is found but the value compared to is different.
563 bool matchInstruction(Instruction *I, bool isEQ) {
564 // If this is an icmp against a constant, handle this as one of the cases.
565 ICmpInst *ICI;
566 ConstantInt *C;
567 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
568 (C = GetConstantInt(I->getOperand(1), DL)))) {
569 return false;
570 }
571
572 Value *RHSVal;
573 const APInt *RHSC;
574
575 // Pattern match a special case
576 // (x & ~2^z) == y --> x == y || x == y|2^z
577 // This undoes a transformation done by instcombine to fuse 2 compares.
578 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579 // It's a little bit hard to see why the following transformations are
580 // correct. Here is a CVC3 program to verify them for 64-bit values:
581
582 /*
583 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584 x : BITVECTOR(64);
585 y : BITVECTOR(64);
586 z : BITVECTOR(64);
587 mask : BITVECTOR(64) = BVSHL(ONE, z);
588 QUERY( (y & ~mask = y) =>
589 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
590 );
591 QUERY( (y | mask = y) =>
592 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 );
594 */
595
596 // Please note that each pattern must be a dual implication (<--> or
597 // iff). One directional implication can create spurious matches. If the
598 // implication is only one-way, an unsatisfiable condition on the left
599 // side can imply a satisfiable condition on the right side. Dual
600 // implication ensures that satisfiable conditions are transformed to
601 // other satisfiable conditions and unsatisfiable conditions are
602 // transformed to other unsatisfiable conditions.
603
604 // Here is a concrete example of a unsatisfiable condition on the left
605 // implying a satisfiable condition on the right:
606 //
607 // mask = (1 << z)
608 // (x & ~mask) == y --> (x == y || x == (y | mask))
609 //
610 // Substituting y = 3, z = 0 yields:
611 // (x & -2) == 3 --> (x == 3 || x == 2)
612
613 // Pattern match a special case:
614 /*
615 QUERY( (y & ~mask = y) =>
616 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
617 );
618 */
619 if (match(ICI->getOperand(0),
620 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
621 APInt Mask = ~*RHSC;
622 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623 // If we already have a value for the switch, it has to match!
624 if (!setValueOnce(RHSVal))
625 return false;
626
627 Vals.push_back(C);
628 Vals.push_back(
629 ConstantInt::get(C->getContext(),
630 C->getValue() | Mask));
631 UsedICmps++;
632 return true;
633 }
634 }
635
636 // Pattern match a special case:
637 /*
638 QUERY( (y | mask = y) =>
639 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
640 );
641 */
642 if (match(ICI->getOperand(0),
643 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
644 APInt Mask = *RHSC;
645 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(RHSVal))
648 return false;
649
650 Vals.push_back(C);
651 Vals.push_back(ConstantInt::get(C->getContext(),
652 C->getValue() & ~Mask));
653 UsedICmps++;
654 return true;
655 }
656 }
657
658 // If we already have a value for the switch, it has to match!
659 if (!setValueOnce(ICI->getOperand(0)))
660 return false;
661
662 UsedICmps++;
663 Vals.push_back(C);
664 return ICI->getOperand(0);
665 }
666
667 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668 ConstantRange Span =
670
671 // Shift the range if the compare is fed by an add. This is the range
672 // compare idiom as emitted by instcombine.
673 Value *CandidateVal = I->getOperand(0);
674 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
675 Span = Span.subtract(*RHSC);
676 CandidateVal = RHSVal;
677 }
678
679 // If this is an and/!= check, then we are looking to build the set of
680 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
681 // x != 0 && x != 1.
682 if (!isEQ)
683 Span = Span.inverse();
684
685 // If there are a ton of values, we don't want to make a ginormous switch.
686 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
687 return false;
688 }
689
690 // If we already have a value for the switch, it has to match!
691 if (!setValueOnce(CandidateVal))
692 return false;
693
694 // Add all values from the range to the set
695 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
697
698 UsedICmps++;
699 return true;
700 }
701
702 /// Given a potentially 'or'd or 'and'd together collection of icmp
703 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
704 /// the value being compared, and stick the list constants into the Vals
705 /// vector.
706 /// One "Extra" case is allowed to differ from the other.
707 void gather(Value *V) {
708 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
709
710 // Keep a stack (SmallVector for efficiency) for depth-first traversal
713
714 // Initialize
715 Visited.insert(V);
716 DFT.push_back(V);
717
718 while (!DFT.empty()) {
719 V = DFT.pop_back_val();
720
721 if (Instruction *I = dyn_cast<Instruction>(V)) {
722 // If it is a || (or && depending on isEQ), process the operands.
723 Value *Op0, *Op1;
724 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
725 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
726 if (Visited.insert(Op1).second)
727 DFT.push_back(Op1);
728 if (Visited.insert(Op0).second)
729 DFT.push_back(Op0);
730
731 continue;
732 }
733
734 // Try to match the current instruction
735 if (matchInstruction(I, isEQ))
736 // Match succeed, continue the loop
737 continue;
738 }
739
740 // One element of the sequence of || (or &&) could not be match as a
741 // comparison against the same value as the others.
742 // We allow only one "Extra" case to be checked before the switch
743 if (!Extra) {
744 Extra = V;
745 continue;
746 }
747 // Failed to parse a proper sequence, abort now
748 CompValue = nullptr;
749 break;
750 }
751 }
752};
753
754} // end anonymous namespace
755
757 MemorySSAUpdater *MSSAU = nullptr) {
758 Instruction *Cond = nullptr;
759 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
760 Cond = dyn_cast<Instruction>(SI->getCondition());
761 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
762 if (BI->isConditional())
763 Cond = dyn_cast<Instruction>(BI->getCondition());
764 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
765 Cond = dyn_cast<Instruction>(IBI->getAddress());
766 }
767
768 TI->eraseFromParent();
769 if (Cond)
771}
772
773/// Return true if the specified terminator checks
774/// to see if a value is equal to constant integer value.
775Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
776 Value *CV = nullptr;
777 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
778 // Do not permit merging of large switch instructions into their
779 // predecessors unless there is only one predecessor.
780 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
781 CV = SI->getCondition();
782 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
783 if (BI->isConditional() && BI->getCondition()->hasOneUse())
784 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
785 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
786 CV = ICI->getOperand(0);
787 }
788
789 // Unwrap any lossless ptrtoint cast.
790 if (CV) {
791 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
792 Value *Ptr = PTII->getPointerOperand();
793 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794 CV = Ptr;
795 }
796 }
797 return CV;
798}
799
800/// Given a value comparison instruction,
801/// decode all of the 'cases' that it represents and return the 'default' block.
802BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
803 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
805 Cases.reserve(SI->getNumCases());
806 for (auto Case : SI->cases())
807 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
808 Case.getCaseSuccessor()));
809 return SI->getDefaultDest();
810 }
811
812 BranchInst *BI = cast<BranchInst>(TI);
813 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
814 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
815 Cases.push_back(ValueEqualityComparisonCase(
816 GetConstantInt(ICI->getOperand(1), DL), Succ));
817 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
818}
819
820/// Given a vector of bb/value pairs, remove any entries
821/// in the list that match the specified block.
822static void
824 std::vector<ValueEqualityComparisonCase> &Cases) {
825 llvm::erase(Cases, BB);
826}
827
828/// Return true if there are any keys in C1 that exist in C2 as well.
829static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830 std::vector<ValueEqualityComparisonCase> &C2) {
831 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
832
833 // Make V1 be smaller than V2.
834 if (V1->size() > V2->size())
835 std::swap(V1, V2);
836
837 if (V1->empty())
838 return false;
839 if (V1->size() == 1) {
840 // Just scan V2.
841 ConstantInt *TheVal = (*V1)[0].Value;
842 for (const ValueEqualityComparisonCase &VECC : *V2)
843 if (TheVal == VECC.Value)
844 return true;
845 }
846
847 // Otherwise, just sort both lists and compare element by element.
848 array_pod_sort(V1->begin(), V1->end());
849 array_pod_sort(V2->begin(), V2->end());
850 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
851 while (i1 != e1 && i2 != e2) {
852 if ((*V1)[i1].Value == (*V2)[i2].Value)
853 return true;
854 if ((*V1)[i1].Value < (*V2)[i2].Value)
855 ++i1;
856 else
857 ++i2;
858 }
859 return false;
860}
861
862// Set branch weights on SwitchInst. This sets the metadata if there is at
863// least one non-zero weight.
865 // Check that there is at least one non-zero weight. Otherwise, pass
866 // nullptr to setMetadata which will erase the existing metadata.
867 MDNode *N = nullptr;
868 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
869 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
870 SI->setMetadata(LLVMContext::MD_prof, N);
871}
872
873// Similar to the above, but for branch and select instructions that take
874// exactly 2 weights.
875static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
876 uint32_t FalseWeight) {
877 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
878 // Check that there is at least one non-zero weight. Otherwise, pass
879 // nullptr to setMetadata which will erase the existing metadata.
880 MDNode *N = nullptr;
881 if (TrueWeight || FalseWeight)
882 N = MDBuilder(I->getParent()->getContext())
883 .createBranchWeights(TrueWeight, FalseWeight);
884 I->setMetadata(LLVMContext::MD_prof, N);
885}
886
887/// If TI is known to be a terminator instruction and its block is known to
888/// only have a single predecessor block, check to see if that predecessor is
889/// also a value comparison with the same value, and if that comparison
890/// determines the outcome of this comparison. If so, simplify TI. This does a
891/// very limited form of jump threading.
892bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
893 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
894 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
895 if (!PredVal)
896 return false; // Not a value comparison in predecessor.
897
898 Value *ThisVal = isValueEqualityComparison(TI);
899 assert(ThisVal && "This isn't a value comparison!!");
900 if (ThisVal != PredVal)
901 return false; // Different predicates.
902
903 // TODO: Preserve branch weight metadata, similarly to how
904 // FoldValueComparisonIntoPredecessors preserves it.
905
906 // Find out information about when control will move from Pred to TI's block.
907 std::vector<ValueEqualityComparisonCase> PredCases;
908 BasicBlock *PredDef =
909 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
910 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
911
912 // Find information about how control leaves this block.
913 std::vector<ValueEqualityComparisonCase> ThisCases;
914 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
915 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
916
917 // If TI's block is the default block from Pred's comparison, potentially
918 // simplify TI based on this knowledge.
919 if (PredDef == TI->getParent()) {
920 // If we are here, we know that the value is none of those cases listed in
921 // PredCases. If there are any cases in ThisCases that are in PredCases, we
922 // can simplify TI.
923 if (!ValuesOverlap(PredCases, ThisCases))
924 return false;
925
926 if (isa<BranchInst>(TI)) {
927 // Okay, one of the successors of this condbr is dead. Convert it to a
928 // uncond br.
929 assert(ThisCases.size() == 1 && "Branch can only have one case!");
930 // Insert the new branch.
931 Instruction *NI = Builder.CreateBr(ThisDef);
932 (void)NI;
933
934 // Remove PHI node entries for the dead edge.
935 ThisCases[0].Dest->removePredecessor(PredDef);
936
937 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
938 << "Through successor TI: " << *TI << "Leaving: " << *NI
939 << "\n");
940
942
943 if (DTU)
944 DTU->applyUpdates(
945 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
946
947 return true;
948 }
949
950 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
951 // Okay, TI has cases that are statically dead, prune them away.
953 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
954 DeadCases.insert(PredCases[i].Value);
955
956 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
957 << "Through successor TI: " << *TI);
958
959 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
960 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
961 --i;
962 auto *Successor = i->getCaseSuccessor();
963 if (DTU)
964 ++NumPerSuccessorCases[Successor];
965 if (DeadCases.count(i->getCaseValue())) {
966 Successor->removePredecessor(PredDef);
967 SI.removeCase(i);
968 if (DTU)
969 --NumPerSuccessorCases[Successor];
970 }
971 }
972
973 if (DTU) {
974 std::vector<DominatorTree::UpdateType> Updates;
975 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
976 if (I.second == 0)
977 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
978 DTU->applyUpdates(Updates);
979 }
980
981 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
982 return true;
983 }
984
985 // Otherwise, TI's block must correspond to some matched value. Find out
986 // which value (or set of values) this is.
987 ConstantInt *TIV = nullptr;
988 BasicBlock *TIBB = TI->getParent();
989 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
990 if (PredCases[i].Dest == TIBB) {
991 if (TIV)
992 return false; // Cannot handle multiple values coming to this block.
993 TIV = PredCases[i].Value;
994 }
995 assert(TIV && "No edge from pred to succ?");
996
997 // Okay, we found the one constant that our value can be if we get into TI's
998 // BB. Find out which successor will unconditionally be branched to.
999 BasicBlock *TheRealDest = nullptr;
1000 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1001 if (ThisCases[i].Value == TIV) {
1002 TheRealDest = ThisCases[i].Dest;
1003 break;
1004 }
1005
1006 // If not handled by any explicit cases, it is handled by the default case.
1007 if (!TheRealDest)
1008 TheRealDest = ThisDef;
1009
1010 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1011
1012 // Remove PHI node entries for dead edges.
1013 BasicBlock *CheckEdge = TheRealDest;
1014 for (BasicBlock *Succ : successors(TIBB))
1015 if (Succ != CheckEdge) {
1016 if (Succ != TheRealDest)
1017 RemovedSuccs.insert(Succ);
1018 Succ->removePredecessor(TIBB);
1019 } else
1020 CheckEdge = nullptr;
1021
1022 // Insert the new branch.
1023 Instruction *NI = Builder.CreateBr(TheRealDest);
1024 (void)NI;
1025
1026 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1027 << "Through successor TI: " << *TI << "Leaving: " << *NI
1028 << "\n");
1029
1031 if (DTU) {
1033 Updates.reserve(RemovedSuccs.size());
1034 for (auto *RemovedSucc : RemovedSuccs)
1035 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1036 DTU->applyUpdates(Updates);
1037 }
1038 return true;
1039}
1040
1041namespace {
1042
1043/// This class implements a stable ordering of constant
1044/// integers that does not depend on their address. This is important for
1045/// applications that sort ConstantInt's to ensure uniqueness.
1046struct ConstantIntOrdering {
1047 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1048 return LHS->getValue().ult(RHS->getValue());
1049 }
1050};
1051
1052} // end anonymous namespace
1053
1055 ConstantInt *const *P2) {
1056 const ConstantInt *LHS = *P1;
1057 const ConstantInt *RHS = *P2;
1058 if (LHS == RHS)
1059 return 0;
1060 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1061}
1062
1063/// Get Weights of a given terminator, the default weight is at the front
1064/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1065/// metadata.
1067 SmallVectorImpl<uint64_t> &Weights) {
1068 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1069 assert(MD);
1070 for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1071 ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1072 Weights.push_back(CI->getValue().getZExtValue());
1073 }
1074
1075 // If TI is a conditional eq, the default case is the false case,
1076 // and the corresponding branch-weight data is at index 2. We swap the
1077 // default weight to be the first entry.
1078 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1079 assert(Weights.size() == 2);
1080 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1081 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1082 std::swap(Weights.front(), Weights.back());
1083 }
1084}
1085
1086/// Keep halving the weights until all can fit in uint32_t.
1088 uint64_t Max = *llvm::max_element(Weights);
1089 if (Max > UINT_MAX) {
1090 unsigned Offset = 32 - llvm::countl_zero(Max);
1091 for (uint64_t &I : Weights)
1092 I >>= Offset;
1093 }
1094}
1095
1097 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1098 Instruction *PTI = PredBlock->getTerminator();
1099
1100 // If we have bonus instructions, clone them into the predecessor block.
1101 // Note that there may be multiple predecessor blocks, so we cannot move
1102 // bonus instructions to a predecessor block.
1103 for (Instruction &BonusInst : *BB) {
1104 if (BonusInst.isTerminator())
1105 continue;
1106
1107 Instruction *NewBonusInst = BonusInst.clone();
1108
1109 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1110 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1111 // Unless the instruction has the same !dbg location as the original
1112 // branch, drop it. When we fold the bonus instructions we want to make
1113 // sure we reset their debug locations in order to avoid stepping on
1114 // dead code caused by folding dead branches.
1115 NewBonusInst->setDebugLoc(DebugLoc());
1116 }
1117
1118 RemapInstruction(NewBonusInst, VMap,
1120
1121 // If we speculated an instruction, we need to drop any metadata that may
1122 // result in undefined behavior, as the metadata might have been valid
1123 // only given the branch precondition.
1124 // Similarly strip attributes on call parameters that may cause UB in
1125 // location the call is moved to.
1126 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1127
1128 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1129 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1130 RemapDbgVariableRecordRange(NewBonusInst->getModule(), Range, VMap,
1133
1134 if (isa<DbgInfoIntrinsic>(BonusInst))
1135 continue;
1136
1137 NewBonusInst->takeName(&BonusInst);
1138 BonusInst.setName(NewBonusInst->getName() + ".old");
1139 VMap[&BonusInst] = NewBonusInst;
1140
1141 // Update (liveout) uses of bonus instructions,
1142 // now that the bonus instruction has been cloned into predecessor.
1143 // Note that we expect to be in a block-closed SSA form for this to work!
1144 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1145 auto *UI = cast<Instruction>(U.getUser());
1146 auto *PN = dyn_cast<PHINode>(UI);
1147 if (!PN) {
1148 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1149 "If the user is not a PHI node, then it should be in the same "
1150 "block as, and come after, the original bonus instruction.");
1151 continue; // Keep using the original bonus instruction.
1152 }
1153 // Is this the block-closed SSA form PHI node?
1154 if (PN->getIncomingBlock(U) == BB)
1155 continue; // Great, keep using the original bonus instruction.
1156 // The only other alternative is an "use" when coming from
1157 // the predecessor block - here we should refer to the cloned bonus instr.
1158 assert(PN->getIncomingBlock(U) == PredBlock &&
1159 "Not in block-closed SSA form?");
1160 U.set(NewBonusInst);
1161 }
1162 }
1163}
1164
1165bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1166 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1167 BasicBlock *BB = TI->getParent();
1168 BasicBlock *Pred = PTI->getParent();
1169
1171
1172 // Figure out which 'cases' to copy from SI to PSI.
1173 std::vector<ValueEqualityComparisonCase> BBCases;
1174 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1175
1176 std::vector<ValueEqualityComparisonCase> PredCases;
1177 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1178
1179 // Based on whether the default edge from PTI goes to BB or not, fill in
1180 // PredCases and PredDefault with the new switch cases we would like to
1181 // build.
1183
1184 // Update the branch weight metadata along the way
1186 bool PredHasWeights = hasBranchWeightMD(*PTI);
1187 bool SuccHasWeights = hasBranchWeightMD(*TI);
1188
1189 if (PredHasWeights) {
1190 GetBranchWeights(PTI, Weights);
1191 // branch-weight metadata is inconsistent here.
1192 if (Weights.size() != 1 + PredCases.size())
1193 PredHasWeights = SuccHasWeights = false;
1194 } else if (SuccHasWeights)
1195 // If there are no predecessor weights but there are successor weights,
1196 // populate Weights with 1, which will later be scaled to the sum of
1197 // successor's weights
1198 Weights.assign(1 + PredCases.size(), 1);
1199
1200 SmallVector<uint64_t, 8> SuccWeights;
1201 if (SuccHasWeights) {
1202 GetBranchWeights(TI, SuccWeights);
1203 // branch-weight metadata is inconsistent here.
1204 if (SuccWeights.size() != 1 + BBCases.size())
1205 PredHasWeights = SuccHasWeights = false;
1206 } else if (PredHasWeights)
1207 SuccWeights.assign(1 + BBCases.size(), 1);
1208
1209 if (PredDefault == BB) {
1210 // If this is the default destination from PTI, only the edges in TI
1211 // that don't occur in PTI, or that branch to BB will be activated.
1212 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1213 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1214 if (PredCases[i].Dest != BB)
1215 PTIHandled.insert(PredCases[i].Value);
1216 else {
1217 // The default destination is BB, we don't need explicit targets.
1218 std::swap(PredCases[i], PredCases.back());
1219
1220 if (PredHasWeights || SuccHasWeights) {
1221 // Increase weight for the default case.
1222 Weights[0] += Weights[i + 1];
1223 std::swap(Weights[i + 1], Weights.back());
1224 Weights.pop_back();
1225 }
1226
1227 PredCases.pop_back();
1228 --i;
1229 --e;
1230 }
1231
1232 // Reconstruct the new switch statement we will be building.
1233 if (PredDefault != BBDefault) {
1234 PredDefault->removePredecessor(Pred);
1235 if (DTU && PredDefault != BB)
1236 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1237 PredDefault = BBDefault;
1238 ++NewSuccessors[BBDefault];
1239 }
1240
1241 unsigned CasesFromPred = Weights.size();
1242 uint64_t ValidTotalSuccWeight = 0;
1243 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1244 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1245 PredCases.push_back(BBCases[i]);
1246 ++NewSuccessors[BBCases[i].Dest];
1247 if (SuccHasWeights || PredHasWeights) {
1248 // The default weight is at index 0, so weight for the ith case
1249 // should be at index i+1. Scale the cases from successor by
1250 // PredDefaultWeight (Weights[0]).
1251 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1252 ValidTotalSuccWeight += SuccWeights[i + 1];
1253 }
1254 }
1255
1256 if (SuccHasWeights || PredHasWeights) {
1257 ValidTotalSuccWeight += SuccWeights[0];
1258 // Scale the cases from predecessor by ValidTotalSuccWeight.
1259 for (unsigned i = 1; i < CasesFromPred; ++i)
1260 Weights[i] *= ValidTotalSuccWeight;
1261 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1262 Weights[0] *= SuccWeights[0];
1263 }
1264 } else {
1265 // If this is not the default destination from PSI, only the edges
1266 // in SI that occur in PSI with a destination of BB will be
1267 // activated.
1268 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1269 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1270 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1271 if (PredCases[i].Dest == BB) {
1272 PTIHandled.insert(PredCases[i].Value);
1273
1274 if (PredHasWeights || SuccHasWeights) {
1275 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1276 std::swap(Weights[i + 1], Weights.back());
1277 Weights.pop_back();
1278 }
1279
1280 std::swap(PredCases[i], PredCases.back());
1281 PredCases.pop_back();
1282 --i;
1283 --e;
1284 }
1285
1286 // Okay, now we know which constants were sent to BB from the
1287 // predecessor. Figure out where they will all go now.
1288 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1289 if (PTIHandled.count(BBCases[i].Value)) {
1290 // If this is one we are capable of getting...
1291 if (PredHasWeights || SuccHasWeights)
1292 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1293 PredCases.push_back(BBCases[i]);
1294 ++NewSuccessors[BBCases[i].Dest];
1295 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1296 }
1297
1298 // If there are any constants vectored to BB that TI doesn't handle,
1299 // they must go to the default destination of TI.
1300 for (ConstantInt *I : PTIHandled) {
1301 if (PredHasWeights || SuccHasWeights)
1302 Weights.push_back(WeightsForHandled[I]);
1303 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1304 ++NewSuccessors[BBDefault];
1305 }
1306 }
1307
1308 // Okay, at this point, we know which new successor Pred will get. Make
1309 // sure we update the number of entries in the PHI nodes for these
1310 // successors.
1311 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1312 if (DTU) {
1313 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1314 Updates.reserve(Updates.size() + NewSuccessors.size());
1315 }
1316 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1317 NewSuccessors) {
1318 for (auto I : seq(NewSuccessor.second)) {
1319 (void)I;
1320 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1321 }
1322 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1323 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1324 }
1325
1326 Builder.SetInsertPoint(PTI);
1327 // Convert pointer to int before we switch.
1328 if (CV->getType()->isPointerTy()) {
1329 CV =
1330 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1331 }
1332
1333 // Now that the successors are updated, create the new Switch instruction.
1334 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1335 NewSI->setDebugLoc(PTI->getDebugLoc());
1336 for (ValueEqualityComparisonCase &V : PredCases)
1337 NewSI->addCase(V.Value, V.Dest);
1338
1339 if (PredHasWeights || SuccHasWeights) {
1340 // Halve the weights if any of them cannot fit in an uint32_t
1341 FitWeights(Weights);
1342
1343 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1344
1345 setBranchWeights(NewSI, MDWeights);
1346 }
1347
1349
1350 // Okay, last check. If BB is still a successor of PSI, then we must
1351 // have an infinite loop case. If so, add an infinitely looping block
1352 // to handle the case to preserve the behavior of the code.
1353 BasicBlock *InfLoopBlock = nullptr;
1354 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1355 if (NewSI->getSuccessor(i) == BB) {
1356 if (!InfLoopBlock) {
1357 // Insert it at the end of the function, because it's either code,
1358 // or it won't matter if it's hot. :)
1359 InfLoopBlock =
1360 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1361 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1362 if (DTU)
1363 Updates.push_back(
1364 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1365 }
1366 NewSI->setSuccessor(i, InfLoopBlock);
1367 }
1368
1369 if (DTU) {
1370 if (InfLoopBlock)
1371 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1372
1373 Updates.push_back({DominatorTree::Delete, Pred, BB});
1374
1375 DTU->applyUpdates(Updates);
1376 }
1377
1378 ++NumFoldValueComparisonIntoPredecessors;
1379 return true;
1380}
1381
1382/// The specified terminator is a value equality comparison instruction
1383/// (either a switch or a branch on "X == c").
1384/// See if any of the predecessors of the terminator block are value comparisons
1385/// on the same value. If so, and if safe to do so, fold them together.
1386bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1387 IRBuilder<> &Builder) {
1388 BasicBlock *BB = TI->getParent();
1389 Value *CV = isValueEqualityComparison(TI); // CondVal
1390 assert(CV && "Not a comparison?");
1391
1392 bool Changed = false;
1393
1395 while (!Preds.empty()) {
1396 BasicBlock *Pred = Preds.pop_back_val();
1397 Instruction *PTI = Pred->getTerminator();
1398
1399 // Don't try to fold into itself.
1400 if (Pred == BB)
1401 continue;
1402
1403 // See if the predecessor is a comparison with the same value.
1404 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1405 if (PCV != CV)
1406 continue;
1407
1409 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1410 for (auto *Succ : FailBlocks) {
1411 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1412 return false;
1413 }
1414 }
1415
1416 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1417 Changed = true;
1418 }
1419 return Changed;
1420}
1421
1422// If we would need to insert a select that uses the value of this invoke
1423// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1424// need to do this), we can't hoist the invoke, as there is nowhere to put the
1425// select in this case.
1427 Instruction *I1, Instruction *I2) {
1428 for (BasicBlock *Succ : successors(BB1)) {
1429 for (const PHINode &PN : Succ->phis()) {
1430 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1431 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1432 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1433 return false;
1434 }
1435 }
1436 }
1437 return true;
1438}
1439
1440// Get interesting characteristics of instructions that
1441// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1442// instructions can be reordered across.
1448
1450 unsigned Flags = 0;
1451 if (I->mayReadFromMemory())
1452 Flags |= SkipReadMem;
1453 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1454 // inalloca) across stacksave/stackrestore boundaries.
1455 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1456 Flags |= SkipSideEffect;
1458 Flags |= SkipImplicitControlFlow;
1459 return Flags;
1460}
1461
1462// Returns true if it is safe to reorder an instruction across preceding
1463// instructions in a basic block.
1464static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1465 // Don't reorder a store over a load.
1466 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1467 return false;
1468
1469 // If we have seen an instruction with side effects, it's unsafe to reorder an
1470 // instruction which reads memory or itself has side effects.
1471 if ((Flags & SkipSideEffect) &&
1472 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1473 return false;
1474
1475 // Reordering across an instruction which does not necessarily transfer
1476 // control to the next instruction is speculation.
1478 return false;
1479
1480 // Hoisting of llvm.deoptimize is only legal together with the next return
1481 // instruction, which this pass is not always able to do.
1482 if (auto *CB = dyn_cast<CallBase>(I))
1483 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1484 return false;
1485
1486 // It's also unsafe/illegal to hoist an instruction above its instruction
1487 // operands
1488 BasicBlock *BB = I->getParent();
1489 for (Value *Op : I->operands()) {
1490 if (auto *J = dyn_cast<Instruction>(Op))
1491 if (J->getParent() == BB)
1492 return false;
1493 }
1494
1495 return true;
1496}
1497
1498static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1499
1500/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1501/// instructions \p I1 and \p I2 can and should be hoisted.
1503 const TargetTransformInfo &TTI) {
1504 // If we're going to hoist a call, make sure that the two instructions
1505 // we're commoning/hoisting are both marked with musttail, or neither of
1506 // them is marked as such. Otherwise, we might end up in a situation where
1507 // we hoist from a block where the terminator is a `ret` to a block where
1508 // the terminator is a `br`, and `musttail` calls expect to be followed by
1509 // a return.
1510 auto *C1 = dyn_cast<CallInst>(I1);
1511 auto *C2 = dyn_cast<CallInst>(I2);
1512 if (C1 && C2)
1513 if (C1->isMustTailCall() != C2->isMustTailCall())
1514 return false;
1515
1517 return false;
1518
1519 // If any of the two call sites has nomerge or convergent attribute, stop
1520 // hoisting.
1521 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1522 if (CB1->cannotMerge() || CB1->isConvergent())
1523 return false;
1524 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1525 if (CB2->cannotMerge() || CB2->isConvergent())
1526 return false;
1527
1528 return true;
1529}
1530
1531/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1532/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1533/// hoistCommonCodeFromSuccessors. e.g. The input:
1534/// I1 DVRs: { x, z },
1535/// OtherInsts: { I2 DVRs: { x, y, z } }
1536/// would result in hoisting only DbgVariableRecord x.
1538 Instruction *TI, Instruction *I1,
1539 SmallVectorImpl<Instruction *> &OtherInsts) {
1540 if (!I1->hasDbgRecords())
1541 return;
1542 using CurrentAndEndIt =
1543 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1544 // Vector of {Current, End} iterators.
1546 Itrs.reserve(OtherInsts.size() + 1);
1547 // Helper lambdas for lock-step checks:
1548 // Return true if this Current == End.
1549 auto atEnd = [](const CurrentAndEndIt &Pair) {
1550 return Pair.first == Pair.second;
1551 };
1552 // Return true if all Current are identical.
1553 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1554 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1556 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1557 });
1558 };
1559
1560 // Collect the iterators.
1561 Itrs.push_back(
1562 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1563 for (Instruction *Other : OtherInsts) {
1564 if (!Other->hasDbgRecords())
1565 return;
1566 Itrs.push_back(
1567 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1568 }
1569
1570 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1571 // the lock-step DbgRecord are identical, hoist all of them to TI.
1572 // This replicates the dbg.* intrinsic behaviour in
1573 // hoistCommonCodeFromSuccessors.
1574 while (none_of(Itrs, atEnd)) {
1575 bool HoistDVRs = allIdentical(Itrs);
1576 for (CurrentAndEndIt &Pair : Itrs) {
1577 // Increment Current iterator now as we may be about to move the
1578 // DbgRecord.
1579 DbgRecord &DR = *Pair.first++;
1580 if (HoistDVRs) {
1581 DR.removeFromParent();
1582 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1583 }
1584 }
1585 }
1586}
1587
1588/// Hoist any common code in the successor blocks up into the block. This
1589/// function guarantees that BB dominates all successors. If EqTermsOnly is
1590/// given, only perform hoisting in case both blocks only contain a terminator.
1591/// In that case, only the original BI will be replaced and selects for PHIs are
1592/// added.
1593bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1594 bool EqTermsOnly) {
1595 // This does very trivial matching, with limited scanning, to find identical
1596 // instructions in the two blocks. In particular, we don't want to get into
1597 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1598 // such, we currently just scan for obviously identical instructions in an
1599 // identical order, possibly separated by the same number of non-identical
1600 // instructions.
1601 unsigned int SuccSize = succ_size(BB);
1602 if (SuccSize < 2)
1603 return false;
1604
1605 // If either of the blocks has it's address taken, then we can't do this fold,
1606 // because the code we'd hoist would no longer run when we jump into the block
1607 // by it's address.
1608 for (auto *Succ : successors(BB))
1609 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1610 return false;
1611
1612 auto *TI = BB->getTerminator();
1613
1614 // The second of pair is a SkipFlags bitmask.
1615 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1616 SmallVector<SuccIterPair, 8> SuccIterPairs;
1617 for (auto *Succ : successors(BB)) {
1618 BasicBlock::iterator SuccItr = Succ->begin();
1619 if (isa<PHINode>(*SuccItr))
1620 return false;
1621 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1622 }
1623
1624 // Check if only hoisting terminators is allowed. This does not add new
1625 // instructions to the hoist location.
1626 if (EqTermsOnly) {
1627 // Skip any debug intrinsics, as they are free to hoist.
1628 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1629 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1630 if (!INonDbg->isTerminator())
1631 return false;
1632 }
1633 // Now we know that we only need to hoist debug intrinsics and the
1634 // terminator. Let the loop below handle those 2 cases.
1635 }
1636
1637 // Count how many instructions were not hoisted so far. There's a limit on how
1638 // many instructions we skip, serving as a compilation time control as well as
1639 // preventing excessive increase of life ranges.
1640 unsigned NumSkipped = 0;
1641 // If we find an unreachable instruction at the beginning of a basic block, we
1642 // can still hoist instructions from the rest of the basic blocks.
1643 if (SuccIterPairs.size() > 2) {
1644 erase_if(SuccIterPairs,
1645 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1646 if (SuccIterPairs.size() < 2)
1647 return false;
1648 }
1649
1650 bool Changed = false;
1651
1652 for (;;) {
1653 auto *SuccIterPairBegin = SuccIterPairs.begin();
1654 auto &BB1ItrPair = *SuccIterPairBegin++;
1655 auto OtherSuccIterPairRange =
1656 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1657 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1658
1659 Instruction *I1 = &*BB1ItrPair.first;
1660
1661 // Skip debug info if it is not identical.
1662 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1663 Instruction *I2 = &*Iter;
1664 return I1->isIdenticalToWhenDefined(I2);
1665 });
1666 if (!AllDbgInstsAreIdentical) {
1667 while (isa<DbgInfoIntrinsic>(I1))
1668 I1 = &*++BB1ItrPair.first;
1669 for (auto &SuccIter : OtherSuccIterRange) {
1670 Instruction *I2 = &*SuccIter;
1671 while (isa<DbgInfoIntrinsic>(I2))
1672 I2 = &*++SuccIter;
1673 }
1674 }
1675
1676 bool AllInstsAreIdentical = true;
1677 bool HasTerminator = I1->isTerminator();
1678 for (auto &SuccIter : OtherSuccIterRange) {
1679 Instruction *I2 = &*SuccIter;
1680 HasTerminator |= I2->isTerminator();
1681 if (AllInstsAreIdentical && (!I1->isIdenticalToWhenDefined(I2) ||
1682 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1683 AllInstsAreIdentical = false;
1684 }
1685
1687 for (auto &SuccIter : OtherSuccIterRange)
1688 OtherInsts.push_back(&*SuccIter);
1689
1690 // If we are hoisting the terminator instruction, don't move one (making a
1691 // broken BB), instead clone it, and remove BI.
1692 if (HasTerminator) {
1693 // Even if BB, which contains only one unreachable instruction, is ignored
1694 // at the beginning of the loop, we can hoist the terminator instruction.
1695 // If any instructions remain in the block, we cannot hoist terminators.
1696 if (NumSkipped || !AllInstsAreIdentical) {
1697 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1698 return Changed;
1699 }
1700
1701 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1702 Changed;
1703 }
1704
1705 if (AllInstsAreIdentical) {
1706 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1707 AllInstsAreIdentical =
1708 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1709 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1710 Instruction *I2 = &*Pair.first;
1711 unsigned SkipFlagsBB2 = Pair.second;
1712 // Even if the instructions are identical, it may not
1713 // be safe to hoist them if we have skipped over
1714 // instructions with side effects or their operands
1715 // weren't hoisted.
1716 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1718 });
1719 }
1720
1721 if (AllInstsAreIdentical) {
1722 BB1ItrPair.first++;
1723 if (isa<DbgInfoIntrinsic>(I1)) {
1724 // The debug location is an integral part of a debug info intrinsic
1725 // and can't be separated from it or replaced. Instead of attempting
1726 // to merge locations, simply hoist both copies of the intrinsic.
1727 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1728 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1729 // and leave any that were not hoisted behind (by calling moveBefore
1730 // rather than moveBeforePreserving).
1731 I1->moveBefore(TI);
1732 for (auto &SuccIter : OtherSuccIterRange) {
1733 auto *I2 = &*SuccIter++;
1734 assert(isa<DbgInfoIntrinsic>(I2));
1735 I2->moveBefore(TI);
1736 }
1737 } else {
1738 // For a normal instruction, we just move one to right before the
1739 // branch, then replace all uses of the other with the first. Finally,
1740 // we remove the now redundant second instruction.
1741 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1742 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1743 // and leave any that were not hoisted behind (by calling moveBefore
1744 // rather than moveBeforePreserving).
1745 I1->moveBefore(TI);
1746 for (auto &SuccIter : OtherSuccIterRange) {
1747 Instruction *I2 = &*SuccIter++;
1748 assert(I2 != I1);
1749 if (!I2->use_empty())
1750 I2->replaceAllUsesWith(I1);
1751 I1->andIRFlags(I2);
1752 combineMetadataForCSE(I1, I2, true);
1753 // I1 and I2 are being combined into a single instruction. Its debug
1754 // location is the merged locations of the original instructions.
1755 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1756 I2->eraseFromParent();
1757 }
1758 }
1759 if (!Changed)
1760 NumHoistCommonCode += SuccIterPairs.size();
1761 Changed = true;
1762 NumHoistCommonInstrs += SuccIterPairs.size();
1763 } else {
1764 if (NumSkipped >= HoistCommonSkipLimit) {
1765 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1766 return Changed;
1767 }
1768 // We are about to skip over a pair of non-identical instructions. Record
1769 // if any have characteristics that would prevent reordering instructions
1770 // across them.
1771 for (auto &SuccIterPair : SuccIterPairs) {
1772 Instruction *I = &*SuccIterPair.first++;
1773 SuccIterPair.second |= skippedInstrFlags(I);
1774 }
1775 ++NumSkipped;
1776 }
1777 }
1778}
1779
1780bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1781 Instruction *TI, Instruction *I1,
1782 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1783
1784 auto *BI = dyn_cast<BranchInst>(TI);
1785
1786 bool Changed = false;
1787 BasicBlock *TIParent = TI->getParent();
1788 BasicBlock *BB1 = I1->getParent();
1789
1790 // Use only for an if statement.
1791 auto *I2 = *OtherSuccTIs.begin();
1792 auto *BB2 = I2->getParent();
1793 if (BI) {
1794 assert(OtherSuccTIs.size() == 1);
1795 assert(BI->getSuccessor(0) == I1->getParent());
1796 assert(BI->getSuccessor(1) == I2->getParent());
1797 }
1798
1799 // In the case of an if statement, we try to hoist an invoke.
1800 // FIXME: Can we define a safety predicate for CallBr?
1801 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1802 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1803 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1804 return false;
1805
1806 // TODO: callbr hoisting currently disabled pending further study.
1807 if (isa<CallBrInst>(I1))
1808 return false;
1809
1810 for (BasicBlock *Succ : successors(BB1)) {
1811 for (PHINode &PN : Succ->phis()) {
1812 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1813 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1814 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1815 if (BB1V == BB2V)
1816 continue;
1817
1818 // In the case of an if statement, check for
1819 // passingValueIsAlwaysUndefined here because we would rather eliminate
1820 // undefined control flow then converting it to a select.
1821 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1823 return false;
1824 }
1825 }
1826 }
1827
1828 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1829 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1830 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1831 // Clone the terminator and hoist it into the pred, without any debug info.
1832 Instruction *NT = I1->clone();
1833 NT->insertInto(TIParent, TI->getIterator());
1834 if (!NT->getType()->isVoidTy()) {
1835 I1->replaceAllUsesWith(NT);
1836 for (Instruction *OtherSuccTI : OtherSuccTIs)
1837 OtherSuccTI->replaceAllUsesWith(NT);
1838 NT->takeName(I1);
1839 }
1840 Changed = true;
1841 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1842
1843 // Ensure terminator gets a debug location, even an unknown one, in case
1844 // it involves inlinable calls.
1846 Locs.push_back(I1->getDebugLoc());
1847 for (auto *OtherSuccTI : OtherSuccTIs)
1848 Locs.push_back(OtherSuccTI->getDebugLoc());
1849 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1850
1851 // PHIs created below will adopt NT's merged DebugLoc.
1852 IRBuilder<NoFolder> Builder(NT);
1853
1854 // In the case of an if statement, hoisting one of the terminators from our
1855 // successor is a great thing. Unfortunately, the successors of the if/else
1856 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1857 // must agree for all PHI nodes, so we insert select instruction to compute
1858 // the final result.
1859 if (BI) {
1860 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1861 for (BasicBlock *Succ : successors(BB1)) {
1862 for (PHINode &PN : Succ->phis()) {
1863 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1864 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1865 if (BB1V == BB2V)
1866 continue;
1867
1868 // These values do not agree. Insert a select instruction before NT
1869 // that determines the right value.
1870 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1871 if (!SI) {
1872 // Propagate fast-math-flags from phi node to its replacement select.
1873 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1874 if (isa<FPMathOperator>(PN))
1875 Builder.setFastMathFlags(PN.getFastMathFlags());
1876
1877 SI = cast<SelectInst>(Builder.CreateSelect(
1878 BI->getCondition(), BB1V, BB2V,
1879 BB1V->getName() + "." + BB2V->getName(), BI));
1880 }
1881
1882 // Make the PHI node use the select for all incoming values for BB1/BB2
1883 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1884 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1885 PN.setIncomingValue(i, SI);
1886 }
1887 }
1888 }
1889
1891
1892 // Update any PHI nodes in our new successors.
1893 for (BasicBlock *Succ : successors(BB1)) {
1894 AddPredecessorToBlock(Succ, TIParent, BB1);
1895 if (DTU)
1896 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1897 }
1898
1899 if (DTU)
1900 for (BasicBlock *Succ : successors(TI))
1901 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1902
1904 if (DTU)
1905 DTU->applyUpdates(Updates);
1906 return Changed;
1907}
1908
1909// Check lifetime markers.
1910static bool isLifeTimeMarker(const Instruction *I) {
1911 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1912 switch (II->getIntrinsicID()) {
1913 default:
1914 break;
1915 case Intrinsic::lifetime_start:
1916 case Intrinsic::lifetime_end:
1917 return true;
1918 }
1919 }
1920 return false;
1921}
1922
1923// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1924// into variables.
1926 int OpIdx) {
1927 return !isa<IntrinsicInst>(I);
1928}
1929
1930// All instructions in Insts belong to different blocks that all unconditionally
1931// branch to a common successor. Analyze each instruction and return true if it
1932// would be possible to sink them into their successor, creating one common
1933// instruction instead. For every value that would be required to be provided by
1934// PHI node (because an operand varies in each input block), add to PHIOperands.
1937 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1938 // Prune out obviously bad instructions to move. Each instruction must have
1939 // exactly zero or one use, and we check later that use is by a single, common
1940 // PHI instruction in the successor.
1941 bool HasUse = !Insts.front()->user_empty();
1942 for (auto *I : Insts) {
1943 // These instructions may change or break semantics if moved.
1944 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1945 I->getType()->isTokenTy())
1946 return false;
1947
1948 // Do not try to sink an instruction in an infinite loop - it can cause
1949 // this algorithm to infinite loop.
1950 if (I->getParent()->getSingleSuccessor() == I->getParent())
1951 return false;
1952
1953 // Conservatively return false if I is an inline-asm instruction. Sinking
1954 // and merging inline-asm instructions can potentially create arguments
1955 // that cannot satisfy the inline-asm constraints.
1956 // If the instruction has nomerge or convergent attribute, return false.
1957 if (const auto *C = dyn_cast<CallBase>(I))
1958 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1959 return false;
1960
1961 // Each instruction must have zero or one use.
1962 if (HasUse && !I->hasOneUse())
1963 return false;
1964 if (!HasUse && !I->user_empty())
1965 return false;
1966 }
1967
1968 const Instruction *I0 = Insts.front();
1969 const auto I0MMRA = MMRAMetadata(*I0);
1970 for (auto *I : Insts) {
1971 if (!I->isSameOperationAs(I0))
1972 return false;
1973
1974 // swifterror pointers can only be used by a load or store; sinking a load
1975 // or store would require introducing a select for the pointer operand,
1976 // which isn't allowed for swifterror pointers.
1977 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1978 return false;
1979 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1980 return false;
1981
1982 // Treat MMRAs conservatively. This pass can be quite aggressive and
1983 // could drop a lot of MMRAs otherwise.
1984 if (MMRAMetadata(*I) != I0MMRA)
1985 return false;
1986 }
1987
1988 // All instructions in Insts are known to be the same opcode. If they have a
1989 // use, check that the only user is a PHI or in the same block as the
1990 // instruction, because if a user is in the same block as an instruction we're
1991 // contemplating sinking, it must already be determined to be sinkable.
1992 if (HasUse) {
1993 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1994 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1995 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1996 auto *U = cast<Instruction>(*I->user_begin());
1997 return (PNUse &&
1998 PNUse->getParent() == Succ &&
1999 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
2000 U->getParent() == I->getParent();
2001 }))
2002 return false;
2003 }
2004
2005 // Because SROA can't handle speculating stores of selects, try not to sink
2006 // loads, stores or lifetime markers of allocas when we'd have to create a
2007 // PHI for the address operand. Also, because it is likely that loads or
2008 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2009 // them.
2010 // This can cause code churn which can have unintended consequences down
2011 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2012 // FIXME: This is a workaround for a deficiency in SROA - see
2013 // https://llvm.org/bugs/show_bug.cgi?id=30188
2014 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2015 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2016 }))
2017 return false;
2018 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2019 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2020 }))
2021 return false;
2022 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2023 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2024 }))
2025 return false;
2026
2027 // For calls to be sinkable, they must all be indirect, or have same callee.
2028 // I.e. if we have two direct calls to different callees, we don't want to
2029 // turn that into an indirect call. Likewise, if we have an indirect call,
2030 // and a direct call, we don't actually want to have a single indirect call.
2031 if (isa<CallBase>(I0)) {
2032 auto IsIndirectCall = [](const Instruction *I) {
2033 return cast<CallBase>(I)->isIndirectCall();
2034 };
2035 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2036 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2037 if (HaveIndirectCalls) {
2038 if (!AllCallsAreIndirect)
2039 return false;
2040 } else {
2041 // All callees must be identical.
2042 Value *Callee = nullptr;
2043 for (const Instruction *I : Insts) {
2044 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2045 if (!Callee)
2046 Callee = CurrCallee;
2047 else if (Callee != CurrCallee)
2048 return false;
2049 }
2050 }
2051 }
2052
2053 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2054 Value *Op = I0->getOperand(OI);
2055 if (Op->getType()->isTokenTy())
2056 // Don't touch any operand of token type.
2057 return false;
2058
2059 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2060 assert(I->getNumOperands() == I0->getNumOperands());
2061 return I->getOperand(OI) == I0->getOperand(OI);
2062 };
2063 if (!all_of(Insts, SameAsI0)) {
2064 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2066 // We can't create a PHI from this GEP.
2067 return false;
2068 for (auto *I : Insts)
2069 PHIOperands[I].push_back(I->getOperand(OI));
2070 }
2071 }
2072 return true;
2073}
2074
2075// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2076// instruction of every block in Blocks to their common successor, commoning
2077// into one instruction.
2079 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2080
2081 // canSinkInstructions returning true guarantees that every block has at
2082 // least one non-terminator instruction.
2084 for (auto *BB : Blocks) {
2085 Instruction *I = BB->getTerminator();
2086 do {
2087 I = I->getPrevNode();
2088 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2089 if (!isa<DbgInfoIntrinsic>(I))
2090 Insts.push_back(I);
2091 }
2092
2093 // The only checking we need to do now is that all users of all instructions
2094 // are the same PHI node. canSinkInstructions should have checked this but
2095 // it is slightly over-aggressive - it gets confused by commutative
2096 // instructions so double-check it here.
2097 Instruction *I0 = Insts.front();
2098 if (!I0->user_empty()) {
2099 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2100 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2101 auto *U = cast<Instruction>(*I->user_begin());
2102 return U == PNUse;
2103 }))
2104 return false;
2105 }
2106
2107 // We don't need to do any more checking here; canSinkInstructions should
2108 // have done it all for us.
2109 SmallVector<Value*, 4> NewOperands;
2110 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2111 // This check is different to that in canSinkInstructions. There, we
2112 // cared about the global view once simplifycfg (and instcombine) have
2113 // completed - it takes into account PHIs that become trivially
2114 // simplifiable. However here we need a more local view; if an operand
2115 // differs we create a PHI and rely on instcombine to clean up the very
2116 // small mess we may make.
2117 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2118 return I->getOperand(O) != I0->getOperand(O);
2119 });
2120 if (!NeedPHI) {
2121 NewOperands.push_back(I0->getOperand(O));
2122 continue;
2123 }
2124
2125 // Create a new PHI in the successor block and populate it.
2126 auto *Op = I0->getOperand(O);
2127 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2128 auto *PN =
2129 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2130 PN->insertBefore(BBEnd->begin());
2131 for (auto *I : Insts)
2132 PN->addIncoming(I->getOperand(O), I->getParent());
2133 NewOperands.push_back(PN);
2134 }
2135
2136 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2137 // and move it to the start of the successor block.
2138 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2139 I0->getOperandUse(O).set(NewOperands[O]);
2140
2141 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2142
2143 // Update metadata and IR flags, and merge debug locations.
2144 for (auto *I : Insts)
2145 if (I != I0) {
2146 // The debug location for the "common" instruction is the merged locations
2147 // of all the commoned instructions. We start with the original location
2148 // of the "common" instruction and iteratively merge each location in the
2149 // loop below.
2150 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2151 // However, as N-way merge for CallInst is rare, so we use simplified API
2152 // instead of using complex API for N-way merge.
2153 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2154 combineMetadataForCSE(I0, I, true);
2155 I0->andIRFlags(I);
2156 }
2157
2158 if (!I0->user_empty()) {
2159 // canSinkLastInstruction checked that all instructions were used by
2160 // one and only one PHI node. Find that now, RAUW it to our common
2161 // instruction and nuke it.
2162 auto *PN = cast<PHINode>(*I0->user_begin());
2163 PN->replaceAllUsesWith(I0);
2164 PN->eraseFromParent();
2165 }
2166
2167 // Finally nuke all instructions apart from the common instruction.
2168 for (auto *I : Insts) {
2169 if (I == I0)
2170 continue;
2171 // The remaining uses are debug users, replace those with the common inst.
2172 // In most (all?) cases this just introduces a use-before-def.
2173 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2174 I->replaceAllUsesWith(I0);
2175 I->eraseFromParent();
2176 }
2177
2178 return true;
2179}
2180
2181namespace {
2182
2183 // LockstepReverseIterator - Iterates through instructions
2184 // in a set of blocks in reverse order from the first non-terminator.
2185 // For example (assume all blocks have size n):
2186 // LockstepReverseIterator I([B1, B2, B3]);
2187 // *I-- = [B1[n], B2[n], B3[n]];
2188 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2189 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2190 // ...
2191 class LockstepReverseIterator {
2194 bool Fail;
2195
2196 public:
2197 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2198 reset();
2199 }
2200
2201 void reset() {
2202 Fail = false;
2203 Insts.clear();
2204 for (auto *BB : Blocks) {
2205 Instruction *Inst = BB->getTerminator();
2206 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2207 Inst = Inst->getPrevNode();
2208 if (!Inst) {
2209 // Block wasn't big enough.
2210 Fail = true;
2211 return;
2212 }
2213 Insts.push_back(Inst);
2214 }
2215 }
2216
2217 bool isValid() const {
2218 return !Fail;
2219 }
2220
2221 void operator--() {
2222 if (Fail)
2223 return;
2224 for (auto *&Inst : Insts) {
2225 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2226 Inst = Inst->getPrevNode();
2227 // Already at beginning of block.
2228 if (!Inst) {
2229 Fail = true;
2230 return;
2231 }
2232 }
2233 }
2234
2235 void operator++() {
2236 if (Fail)
2237 return;
2238 for (auto *&Inst : Insts) {
2239 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2240 Inst = Inst->getNextNode();
2241 // Already at end of block.
2242 if (!Inst) {
2243 Fail = true;
2244 return;
2245 }
2246 }
2247 }
2248
2250 return Insts;
2251 }
2252 };
2253
2254} // end anonymous namespace
2255
2256/// Check whether BB's predecessors end with unconditional branches. If it is
2257/// true, sink any common code from the predecessors to BB.
2259 DomTreeUpdater *DTU) {
2260 // We support two situations:
2261 // (1) all incoming arcs are unconditional
2262 // (2) there are non-unconditional incoming arcs
2263 //
2264 // (2) is very common in switch defaults and
2265 // else-if patterns;
2266 //
2267 // if (a) f(1);
2268 // else if (b) f(2);
2269 //
2270 // produces:
2271 //
2272 // [if]
2273 // / \
2274 // [f(1)] [if]
2275 // | | \
2276 // | | |
2277 // | [f(2)]|
2278 // \ | /
2279 // [ end ]
2280 //
2281 // [end] has two unconditional predecessor arcs and one conditional. The
2282 // conditional refers to the implicit empty 'else' arc. This conditional
2283 // arc can also be caused by an empty default block in a switch.
2284 //
2285 // In this case, we attempt to sink code from all *unconditional* arcs.
2286 // If we can sink instructions from these arcs (determined during the scan
2287 // phase below) we insert a common successor for all unconditional arcs and
2288 // connect that to [end], to enable sinking:
2289 //
2290 // [if]
2291 // / \
2292 // [x(1)] [if]
2293 // | | \
2294 // | | \
2295 // | [x(2)] |
2296 // \ / |
2297 // [sink.split] |
2298 // \ /
2299 // [ end ]
2300 //
2301 SmallVector<BasicBlock*,4> UnconditionalPreds;
2302 bool HaveNonUnconditionalPredecessors = false;
2303 for (auto *PredBB : predecessors(BB)) {
2304 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2305 if (PredBr && PredBr->isUnconditional())
2306 UnconditionalPreds.push_back(PredBB);
2307 else
2308 HaveNonUnconditionalPredecessors = true;
2309 }
2310 if (UnconditionalPreds.size() < 2)
2311 return false;
2312
2313 // We take a two-step approach to tail sinking. First we scan from the end of
2314 // each block upwards in lockstep. If the n'th instruction from the end of each
2315 // block can be sunk, those instructions are added to ValuesToSink and we
2316 // carry on. If we can sink an instruction but need to PHI-merge some operands
2317 // (because they're not identical in each instruction) we add these to
2318 // PHIOperands.
2319 int ScanIdx = 0;
2320 SmallPtrSet<Value*,4> InstructionsToSink;
2322 LockstepReverseIterator LRI(UnconditionalPreds);
2323 while (LRI.isValid() &&
2324 canSinkInstructions(*LRI, PHIOperands)) {
2325 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2326 << "\n");
2327 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2328 ++ScanIdx;
2329 --LRI;
2330 }
2331
2332 // If no instructions can be sunk, early-return.
2333 if (ScanIdx == 0)
2334 return false;
2335
2336 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2337
2338 if (!followedByDeoptOrUnreachable) {
2339 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2340 // actually sink before encountering instruction that is unprofitable to
2341 // sink?
2342 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2343 unsigned NumPHIdValues = 0;
2344 for (auto *I : *LRI)
2345 for (auto *V : PHIOperands[I]) {
2346 if (!InstructionsToSink.contains(V))
2347 ++NumPHIdValues;
2348 // FIXME: this check is overly optimistic. We may end up not sinking
2349 // said instruction, due to the very same profitability check.
2350 // See @creating_too_many_phis in sink-common-code.ll.
2351 }
2352 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2353 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2354 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2355 NumPHIInsts++;
2356
2357 return NumPHIInsts <= 1;
2358 };
2359
2360 // We've determined that we are going to sink last ScanIdx instructions,
2361 // and recorded them in InstructionsToSink. Now, some instructions may be
2362 // unprofitable to sink. But that determination depends on the instructions
2363 // that we are going to sink.
2364
2365 // First, forward scan: find the first instruction unprofitable to sink,
2366 // recording all the ones that are profitable to sink.
2367 // FIXME: would it be better, after we detect that not all are profitable.
2368 // to either record the profitable ones, or erase the unprofitable ones?
2369 // Maybe we need to choose (at runtime) the one that will touch least
2370 // instrs?
2371 LRI.reset();
2372 int Idx = 0;
2373 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2374 while (Idx < ScanIdx) {
2375 if (!ProfitableToSinkInstruction(LRI)) {
2376 // Too many PHIs would be created.
2377 LLVM_DEBUG(
2378 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2379 break;
2380 }
2381 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2382 --LRI;
2383 ++Idx;
2384 }
2385
2386 // If no instructions can be sunk, early-return.
2387 if (Idx == 0)
2388 return false;
2389
2390 // Did we determine that (only) some instructions are unprofitable to sink?
2391 if (Idx < ScanIdx) {
2392 // Okay, some instructions are unprofitable.
2393 ScanIdx = Idx;
2394 InstructionsToSink = InstructionsProfitableToSink;
2395
2396 // But, that may make other instructions unprofitable, too.
2397 // So, do a backward scan, do any earlier instructions become
2398 // unprofitable?
2399 assert(
2400 !ProfitableToSinkInstruction(LRI) &&
2401 "We already know that the last instruction is unprofitable to sink");
2402 ++LRI;
2403 --Idx;
2404 while (Idx >= 0) {
2405 // If we detect that an instruction becomes unprofitable to sink,
2406 // all earlier instructions won't be sunk either,
2407 // so preemptively keep InstructionsProfitableToSink in sync.
2408 // FIXME: is this the most performant approach?
2409 for (auto *I : *LRI)
2410 InstructionsProfitableToSink.erase(I);
2411 if (!ProfitableToSinkInstruction(LRI)) {
2412 // Everything starting with this instruction won't be sunk.
2413 ScanIdx = Idx;
2414 InstructionsToSink = InstructionsProfitableToSink;
2415 }
2416 ++LRI;
2417 --Idx;
2418 }
2419 }
2420
2421 // If no instructions can be sunk, early-return.
2422 if (ScanIdx == 0)
2423 return false;
2424 }
2425
2426 bool Changed = false;
2427
2428 if (HaveNonUnconditionalPredecessors) {
2429 if (!followedByDeoptOrUnreachable) {
2430 // It is always legal to sink common instructions from unconditional
2431 // predecessors. However, if not all predecessors are unconditional,
2432 // this transformation might be pessimizing. So as a rule of thumb,
2433 // don't do it unless we'd sink at least one non-speculatable instruction.
2434 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2435 LRI.reset();
2436 int Idx = 0;
2437 bool Profitable = false;
2438 while (Idx < ScanIdx) {
2439 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2440 Profitable = true;
2441 break;
2442 }
2443 --LRI;
2444 ++Idx;
2445 }
2446 if (!Profitable)
2447 return false;
2448 }
2449
2450 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2451 // We have a conditional edge and we're going to sink some instructions.
2452 // Insert a new block postdominating all blocks we're going to sink from.
2453 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2454 // Edges couldn't be split.
2455 return false;
2456 Changed = true;
2457 }
2458
2459 // Now that we've analyzed all potential sinking candidates, perform the
2460 // actual sink. We iteratively sink the last non-terminator of the source
2461 // blocks into their common successor unless doing so would require too
2462 // many PHI instructions to be generated (currently only one PHI is allowed
2463 // per sunk instruction).
2464 //
2465 // We can use InstructionsToSink to discount values needing PHI-merging that will
2466 // actually be sunk in a later iteration. This allows us to be more
2467 // aggressive in what we sink. This does allow a false positive where we
2468 // sink presuming a later value will also be sunk, but stop half way through
2469 // and never actually sink it which means we produce more PHIs than intended.
2470 // This is unlikely in practice though.
2471 int SinkIdx = 0;
2472 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2473 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2474 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2475 << "\n");
2476
2477 // Because we've sunk every instruction in turn, the current instruction to
2478 // sink is always at index 0.
2479 LRI.reset();
2480
2481 if (!sinkLastInstruction(UnconditionalPreds)) {
2482 LLVM_DEBUG(
2483 dbgs()
2484 << "SINK: stopping here, failed to actually sink instruction!\n");
2485 break;
2486 }
2487
2488 NumSinkCommonInstrs++;
2489 Changed = true;
2490 }
2491 if (SinkIdx != 0)
2492 ++NumSinkCommonCode;
2493 return Changed;
2494}
2495
2496namespace {
2497
2498struct CompatibleSets {
2499 using SetTy = SmallVector<InvokeInst *, 2>;
2500
2502
2503 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2504
2505 SetTy &getCompatibleSet(InvokeInst *II);
2506
2507 void insert(InvokeInst *II);
2508};
2509
2510CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2511 // Perform a linear scan over all the existing sets, see if the new `invoke`
2512 // is compatible with any particular set. Since we know that all the `invokes`
2513 // within a set are compatible, only check the first `invoke` in each set.
2514 // WARNING: at worst, this has quadratic complexity.
2515 for (CompatibleSets::SetTy &Set : Sets) {
2516 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2517 return Set;
2518 }
2519
2520 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2521 return Sets.emplace_back();
2522}
2523
2524void CompatibleSets::insert(InvokeInst *II) {
2525 getCompatibleSet(II).emplace_back(II);
2526}
2527
2528bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2529 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2530
2531 // Can we theoretically merge these `invoke`s?
2532 auto IsIllegalToMerge = [](InvokeInst *II) {
2533 return II->cannotMerge() || II->isInlineAsm();
2534 };
2535 if (any_of(Invokes, IsIllegalToMerge))
2536 return false;
2537
2538 // Either both `invoke`s must be direct,
2539 // or both `invoke`s must be indirect.
2540 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2541 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2542 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2543 if (HaveIndirectCalls) {
2544 if (!AllCallsAreIndirect)
2545 return false;
2546 } else {
2547 // All callees must be identical.
2548 Value *Callee = nullptr;
2549 for (InvokeInst *II : Invokes) {
2550 Value *CurrCallee = II->getCalledOperand();
2551 assert(CurrCallee && "There is always a called operand.");
2552 if (!Callee)
2553 Callee = CurrCallee;
2554 else if (Callee != CurrCallee)
2555 return false;
2556 }
2557 }
2558
2559 // Either both `invoke`s must not have a normal destination,
2560 // or both `invoke`s must have a normal destination,
2561 auto HasNormalDest = [](InvokeInst *II) {
2562 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2563 };
2564 if (any_of(Invokes, HasNormalDest)) {
2565 // Do not merge `invoke` that does not have a normal destination with one
2566 // that does have a normal destination, even though doing so would be legal.
2567 if (!all_of(Invokes, HasNormalDest))
2568 return false;
2569
2570 // All normal destinations must be identical.
2571 BasicBlock *NormalBB = nullptr;
2572 for (InvokeInst *II : Invokes) {
2573 BasicBlock *CurrNormalBB = II->getNormalDest();
2574 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2575 if (!NormalBB)
2576 NormalBB = CurrNormalBB;
2577 else if (NormalBB != CurrNormalBB)
2578 return false;
2579 }
2580
2581 // In the normal destination, the incoming values for these two `invoke`s
2582 // must be compatible.
2583 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2585 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2586 &EquivalenceSet))
2587 return false;
2588 }
2589
2590#ifndef NDEBUG
2591 // All unwind destinations must be identical.
2592 // We know that because we have started from said unwind destination.
2593 BasicBlock *UnwindBB = nullptr;
2594 for (InvokeInst *II : Invokes) {
2595 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2596 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2597 if (!UnwindBB)
2598 UnwindBB = CurrUnwindBB;
2599 else
2600 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2601 }
2602#endif
2603
2604 // In the unwind destination, the incoming values for these two `invoke`s
2605 // must be compatible.
2607 Invokes.front()->getUnwindDest(),
2608 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2609 return false;
2610
2611 // Ignoring arguments, these `invoke`s must be identical,
2612 // including operand bundles.
2613 const InvokeInst *II0 = Invokes.front();
2614 for (auto *II : Invokes.drop_front())
2615 if (!II->isSameOperationAs(II0))
2616 return false;
2617
2618 // Can we theoretically form the data operands for the merged `invoke`?
2619 auto IsIllegalToMergeArguments = [](auto Ops) {
2620 Use &U0 = std::get<0>(Ops);
2621 Use &U1 = std::get<1>(Ops);
2622 if (U0 == U1)
2623 return false;
2624 return U0->getType()->isTokenTy() ||
2625 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2626 U0.getOperandNo());
2627 };
2628 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2629 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2630 IsIllegalToMergeArguments))
2631 return false;
2632
2633 return true;
2634}
2635
2636} // namespace
2637
2638// Merge all invokes in the provided set, all of which are compatible
2639// as per the `CompatibleSets::shouldBelongToSameSet()`.
2641 DomTreeUpdater *DTU) {
2642 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2643
2645 if (DTU)
2646 Updates.reserve(2 + 3 * Invokes.size());
2647
2648 bool HasNormalDest =
2649 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2650
2651 // Clone one of the invokes into a new basic block.
2652 // Since they are all compatible, it doesn't matter which invoke is cloned.
2653 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2654 InvokeInst *II0 = Invokes.front();
2655 BasicBlock *II0BB = II0->getParent();
2656 BasicBlock *InsertBeforeBlock =
2657 II0->getParent()->getIterator()->getNextNode();
2658 Function *Func = II0BB->getParent();
2659 LLVMContext &Ctx = II0->getContext();
2660
2661 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2662 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2663
2664 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2665 // NOTE: all invokes have the same attributes, so no handling needed.
2666 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2667
2668 if (!HasNormalDest) {
2669 // This set does not have a normal destination,
2670 // so just form a new block with unreachable terminator.
2671 BasicBlock *MergedNormalDest = BasicBlock::Create(
2672 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2673 new UnreachableInst(Ctx, MergedNormalDest);
2674 MergedInvoke->setNormalDest(MergedNormalDest);
2675 }
2676
2677 // The unwind destination, however, remainds identical for all invokes here.
2678
2679 return MergedInvoke;
2680 }();
2681
2682 if (DTU) {
2683 // Predecessor blocks that contained these invokes will now branch to
2684 // the new block that contains the merged invoke, ...
2685 for (InvokeInst *II : Invokes)
2686 Updates.push_back(
2687 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2688
2689 // ... which has the new `unreachable` block as normal destination,
2690 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2691 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2692 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2693 SuccBBOfMergedInvoke});
2694
2695 // Since predecessor blocks now unconditionally branch to a new block,
2696 // they no longer branch to their original successors.
2697 for (InvokeInst *II : Invokes)
2698 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2699 Updates.push_back(
2700 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2701 }
2702
2703 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2704
2705 // Form the merged operands for the merged invoke.
2706 for (Use &U : MergedInvoke->operands()) {
2707 // Only PHI together the indirect callees and data operands.
2708 if (MergedInvoke->isCallee(&U)) {
2709 if (!IsIndirectCall)
2710 continue;
2711 } else if (!MergedInvoke->isDataOperand(&U))
2712 continue;
2713
2714 // Don't create trivial PHI's with all-identical incoming values.
2715 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2716 return II->getOperand(U.getOperandNo()) != U.get();
2717 });
2718 if (!NeedPHI)
2719 continue;
2720
2721 // Form a PHI out of all the data ops under this index.
2723 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2724 for (InvokeInst *II : Invokes)
2725 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2726
2727 U.set(PN);
2728 }
2729
2730 // We've ensured that each PHI node has compatible (identical) incoming values
2731 // when coming from each of the `invoke`s in the current merge set,
2732 // so update the PHI nodes accordingly.
2733 for (BasicBlock *Succ : successors(MergedInvoke))
2734 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2735 /*ExistPred=*/Invokes.front()->getParent());
2736
2737 // And finally, replace the original `invoke`s with an unconditional branch
2738 // to the block with the merged `invoke`. Also, give that merged `invoke`
2739 // the merged debugloc of all the original `invoke`s.
2740 DILocation *MergedDebugLoc = nullptr;
2741 for (InvokeInst *II : Invokes) {
2742 // Compute the debug location common to all the original `invoke`s.
2743 if (!MergedDebugLoc)
2744 MergedDebugLoc = II->getDebugLoc();
2745 else
2746 MergedDebugLoc =
2747 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2748
2749 // And replace the old `invoke` with an unconditionally branch
2750 // to the block with the merged `invoke`.
2751 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2752 OrigSuccBB->removePredecessor(II->getParent());
2753 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2754 II->replaceAllUsesWith(MergedInvoke);
2755 II->eraseFromParent();
2756 ++NumInvokesMerged;
2757 }
2758 MergedInvoke->setDebugLoc(MergedDebugLoc);
2759 ++NumInvokeSetsFormed;
2760
2761 if (DTU)
2762 DTU->applyUpdates(Updates);
2763}
2764
2765/// If this block is a `landingpad` exception handling block, categorize all
2766/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2767/// being "mergeable" together, and then merge invokes in each set together.
2768///
2769/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2770/// [...] [...]
2771/// | |
2772/// [invoke0] [invoke1]
2773/// / \ / \
2774/// [cont0] [landingpad] [cont1]
2775/// to:
2776/// [...] [...]
2777/// \ /
2778/// [invoke]
2779/// / \
2780/// [cont] [landingpad]
2781///
2782/// But of course we can only do that if the invokes share the `landingpad`,
2783/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2784/// and the invoked functions are "compatible".
2787 return false;
2788
2789 bool Changed = false;
2790
2791 // FIXME: generalize to all exception handling blocks?
2792 if (!BB->isLandingPad())
2793 return Changed;
2794
2795 CompatibleSets Grouper;
2796
2797 // Record all the predecessors of this `landingpad`. As per verifier,
2798 // the only allowed predecessor is the unwind edge of an `invoke`.
2799 // We want to group "compatible" `invokes` into the same set to be merged.
2800 for (BasicBlock *PredBB : predecessors(BB))
2801 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2802
2803 // And now, merge `invoke`s that were grouped togeter.
2804 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2805 if (Invokes.size() < 2)
2806 continue;
2807 Changed = true;
2808 MergeCompatibleInvokesImpl(Invokes, DTU);
2809 }
2810
2811 return Changed;
2812}
2813
2814namespace {
2815/// Track ephemeral values, which should be ignored for cost-modelling
2816/// purposes. Requires walking instructions in reverse order.
2817class EphemeralValueTracker {
2819
2820 bool isEphemeral(const Instruction *I) {
2821 if (isa<AssumeInst>(I))
2822 return true;
2823 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2824 all_of(I->users(), [&](const User *U) {
2825 return EphValues.count(cast<Instruction>(U));
2826 });
2827 }
2828
2829public:
2830 bool track(const Instruction *I) {
2831 if (isEphemeral(I)) {
2832 EphValues.insert(I);
2833 return true;
2834 }
2835 return false;
2836 }
2837
2838 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2839};
2840} // namespace
2841
2842/// Determine if we can hoist sink a sole store instruction out of a
2843/// conditional block.
2844///
2845/// We are looking for code like the following:
2846/// BrBB:
2847/// store i32 %add, i32* %arrayidx2
2848/// ... // No other stores or function calls (we could be calling a memory
2849/// ... // function).
2850/// %cmp = icmp ult %x, %y
2851/// br i1 %cmp, label %EndBB, label %ThenBB
2852/// ThenBB:
2853/// store i32 %add5, i32* %arrayidx2
2854/// br label EndBB
2855/// EndBB:
2856/// ...
2857/// We are going to transform this into:
2858/// BrBB:
2859/// store i32 %add, i32* %arrayidx2
2860/// ... //
2861/// %cmp = icmp ult %x, %y
2862/// %add.add5 = select i1 %cmp, i32 %add, %add5
2863/// store i32 %add.add5, i32* %arrayidx2
2864/// ...
2865///
2866/// \return The pointer to the value of the previous store if the store can be
2867/// hoisted into the predecessor block. 0 otherwise.
2869 BasicBlock *StoreBB, BasicBlock *EndBB) {
2870 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2871 if (!StoreToHoist)
2872 return nullptr;
2873
2874 // Volatile or atomic.
2875 if (!StoreToHoist->isSimple())
2876 return nullptr;
2877
2878 Value *StorePtr = StoreToHoist->getPointerOperand();
2879 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2880
2881 // Look for a store to the same pointer in BrBB.
2882 unsigned MaxNumInstToLookAt = 9;
2883 // Skip pseudo probe intrinsic calls which are not really killing any memory
2884 // accesses.
2885 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2886 if (!MaxNumInstToLookAt)
2887 break;
2888 --MaxNumInstToLookAt;
2889
2890 // Could be calling an instruction that affects memory like free().
2891 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2892 return nullptr;
2893
2894 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2895 // Found the previous store to same location and type. Make sure it is
2896 // simple, to avoid introducing a spurious non-atomic write after an
2897 // atomic write.
2898 if (SI->getPointerOperand() == StorePtr &&
2899 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2900 SI->getAlign() >= StoreToHoist->getAlign())
2901 // Found the previous store, return its value operand.
2902 return SI->getValueOperand();
2903 return nullptr; // Unknown store.
2904 }
2905
2906 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2907 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2908 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2909 // Local objects (created by an `alloca` instruction) are always
2910 // writable, so once we are past a read from a location it is valid to
2911 // also write to that same location.
2912 // If the address of the local object never escapes the function, that
2913 // means it's never concurrently read or written, hence moving the store
2914 // from under the condition will not introduce a data race.
2915 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2916 if (AI && !PointerMayBeCaptured(AI, false, true))
2917 // Found a previous load, return it.
2918 return LI;
2919 }
2920 // The load didn't work out, but we may still find a store.
2921 }
2922 }
2923
2924 return nullptr;
2925}
2926
2927/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2928/// converted to selects.
2930 BasicBlock *EndBB,
2931 unsigned &SpeculatedInstructions,
2933 const TargetTransformInfo &TTI) {
2935 BB->getParent()->hasMinSize()
2938
2939 bool HaveRewritablePHIs = false;
2940 for (PHINode &PN : EndBB->phis()) {
2941 Value *OrigV = PN.getIncomingValueForBlock(BB);
2942 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2943
2944 // FIXME: Try to remove some of the duplication with
2945 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2946 if (ThenV == OrigV)
2947 continue;
2948
2949 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2951
2952 // Don't convert to selects if we could remove undefined behavior instead.
2953 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2955 return false;
2956
2957 HaveRewritablePHIs = true;
2958 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2959 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2960 if (!OrigCE && !ThenCE)
2961 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2962
2963 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2964 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2965 InstructionCost MaxCost =
2967 if (OrigCost + ThenCost > MaxCost)
2968 return false;
2969
2970 // Account for the cost of an unfolded ConstantExpr which could end up
2971 // getting expanded into Instructions.
2972 // FIXME: This doesn't account for how many operations are combined in the
2973 // constant expression.
2974 ++SpeculatedInstructions;
2975 if (SpeculatedInstructions > 1)
2976 return false;
2977 }
2978
2979 return HaveRewritablePHIs;
2980}
2981
2982/// Speculate a conditional basic block flattening the CFG.
2983///
2984/// Note that this is a very risky transform currently. Speculating
2985/// instructions like this is most often not desirable. Instead, there is an MI
2986/// pass which can do it with full awareness of the resource constraints.
2987/// However, some cases are "obvious" and we should do directly. An example of
2988/// this is speculating a single, reasonably cheap instruction.
2989///
2990/// There is only one distinct advantage to flattening the CFG at the IR level:
2991/// it makes very common but simplistic optimizations such as are common in
2992/// instcombine and the DAG combiner more powerful by removing CFG edges and
2993/// modeling their effects with easier to reason about SSA value graphs.
2994///
2995///
2996/// An illustration of this transform is turning this IR:
2997/// \code
2998/// BB:
2999/// %cmp = icmp ult %x, %y
3000/// br i1 %cmp, label %EndBB, label %ThenBB
3001/// ThenBB:
3002/// %sub = sub %x, %y
3003/// br label BB2
3004/// EndBB:
3005/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3006/// ...
3007/// \endcode
3008///
3009/// Into this IR:
3010/// \code
3011/// BB:
3012/// %cmp = icmp ult %x, %y
3013/// %sub = sub %x, %y
3014/// %cond = select i1 %cmp, 0, %sub
3015/// ...
3016/// \endcode
3017///
3018/// \returns true if the conditional block is removed.
3019bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3020 BasicBlock *ThenBB) {
3021 if (!Options.SpeculateBlocks)
3022 return false;
3023
3024 // Be conservative for now. FP select instruction can often be expensive.
3025 Value *BrCond = BI->getCondition();
3026 if (isa<FCmpInst>(BrCond))
3027 return false;
3028
3029 BasicBlock *BB = BI->getParent();
3030 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3031 InstructionCost Budget =
3033
3034 // If ThenBB is actually on the false edge of the conditional branch, remember
3035 // to swap the select operands later.
3036 bool Invert = false;
3037 if (ThenBB != BI->getSuccessor(0)) {
3038 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3039 Invert = true;
3040 }
3041 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3042
3043 // If the branch is non-unpredictable, and is predicted to *not* branch to
3044 // the `then` block, then avoid speculating it.
3045 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3046 uint64_t TWeight, FWeight;
3047 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3048 (TWeight + FWeight) != 0) {
3049 uint64_t EndWeight = Invert ? TWeight : FWeight;
3050 BranchProbability BIEndProb =
3051 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3053 if (BIEndProb >= Likely)
3054 return false;
3055 }
3056 }
3057
3058 // Keep a count of how many times instructions are used within ThenBB when
3059 // they are candidates for sinking into ThenBB. Specifically:
3060 // - They are defined in BB, and
3061 // - They have no side effects, and
3062 // - All of their uses are in ThenBB.
3063 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3064
3065 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3066
3067 unsigned SpeculatedInstructions = 0;
3068 Value *SpeculatedStoreValue = nullptr;
3069 StoreInst *SpeculatedStore = nullptr;
3070 EphemeralValueTracker EphTracker;
3071 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3072 // Skip debug info.
3073 if (isa<DbgInfoIntrinsic>(I)) {
3074 SpeculatedDbgIntrinsics.push_back(&I);
3075 continue;
3076 }
3077
3078 // Skip pseudo probes. The consequence is we lose track of the branch
3079 // probability for ThenBB, which is fine since the optimization here takes
3080 // place regardless of the branch probability.
3081 if (isa<PseudoProbeInst>(I)) {
3082 // The probe should be deleted so that it will not be over-counted when
3083 // the samples collected on the non-conditional path are counted towards
3084 // the conditional path. We leave it for the counts inference algorithm to
3085 // figure out a proper count for an unknown probe.
3086 SpeculatedDbgIntrinsics.push_back(&I);
3087 continue;
3088 }
3089
3090 // Ignore ephemeral values, they will be dropped by the transform.
3091 if (EphTracker.track(&I))
3092 continue;
3093
3094 // Only speculatively execute a single instruction (not counting the
3095 // terminator) for now.
3096 ++SpeculatedInstructions;
3097 if (SpeculatedInstructions > 1)
3098 return false;
3099
3100 // Don't hoist the instruction if it's unsafe or expensive.
3102 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3103 &I, BB, ThenBB, EndBB))))
3104 return false;
3105 if (!SpeculatedStoreValue &&
3108 return false;
3109
3110 // Store the store speculation candidate.
3111 if (SpeculatedStoreValue)
3112 SpeculatedStore = cast<StoreInst>(&I);
3113
3114 // Do not hoist the instruction if any of its operands are defined but not
3115 // used in BB. The transformation will prevent the operand from
3116 // being sunk into the use block.
3117 for (Use &Op : I.operands()) {
3118 Instruction *OpI = dyn_cast<Instruction>(Op);
3119 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3120 continue; // Not a candidate for sinking.
3121
3122 ++SinkCandidateUseCounts[OpI];
3123 }
3124 }
3125
3126 // Consider any sink candidates which are only used in ThenBB as costs for
3127 // speculation. Note, while we iterate over a DenseMap here, we are summing
3128 // and so iteration order isn't significant.
3129 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3130 if (Inst->hasNUses(Count)) {
3131 ++SpeculatedInstructions;
3132 if (SpeculatedInstructions > 1)
3133 return false;
3134 }
3135
3136 // Check that we can insert the selects and that it's not too expensive to do
3137 // so.
3138 bool Convert = SpeculatedStore != nullptr;
3140 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3141 SpeculatedInstructions,
3142 Cost, TTI);
3143 if (!Convert || Cost > Budget)
3144 return false;
3145
3146 // If we get here, we can hoist the instruction and if-convert.
3147 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3148
3149 // Insert a select of the value of the speculated store.
3150 if (SpeculatedStoreValue) {
3151 IRBuilder<NoFolder> Builder(BI);
3152 Value *OrigV = SpeculatedStore->getValueOperand();
3153 Value *TrueV = SpeculatedStore->getValueOperand();
3154 Value *FalseV = SpeculatedStoreValue;
3155 if (Invert)
3156 std::swap(TrueV, FalseV);
3157 Value *S = Builder.CreateSelect(
3158 BrCond, TrueV, FalseV, "spec.store.select", BI);
3159 SpeculatedStore->setOperand(0, S);
3160 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3161 SpeculatedStore->getDebugLoc());
3162 // The value stored is still conditional, but the store itself is now
3163 // unconditonally executed, so we must be sure that any linked dbg.assign
3164 // intrinsics are tracking the new stored value (the result of the
3165 // select). If we don't, and the store were to be removed by another pass
3166 // (e.g. DSE), then we'd eventually end up emitting a location describing
3167 // the conditional value, unconditionally.
3168 //
3169 // === Before this transformation ===
3170 // pred:
3171 // store %one, %x.dest, !DIAssignID !1
3172 // dbg.assign %one, "x", ..., !1, ...
3173 // br %cond if.then
3174 //
3175 // if.then:
3176 // store %two, %x.dest, !DIAssignID !2
3177 // dbg.assign %two, "x", ..., !2, ...
3178 //
3179 // === After this transformation ===
3180 // pred:
3181 // store %one, %x.dest, !DIAssignID !1
3182 // dbg.assign %one, "x", ..., !1
3183 /// ...
3184 // %merge = select %cond, %two, %one
3185 // store %merge, %x.dest, !DIAssignID !2
3186 // dbg.assign %merge, "x", ..., !2
3187 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3188 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3189 DbgAssign->replaceVariableLocationOp(OrigV, S);
3190 };
3191 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3192 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3193 }
3194
3195 // Metadata can be dependent on the condition we are hoisting above.
3196 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3197 // to avoid making it appear as if the condition is a constant, which would
3198 // be misleading while debugging.
3199 // Similarly strip attributes that maybe dependent on condition we are
3200 // hoisting above.
3201 for (auto &I : make_early_inc_range(*ThenBB)) {
3202 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3203 // Don't update the DILocation of dbg.assign intrinsics.
3204 if (!isa<DbgAssignIntrinsic>(&I))
3205 I.setDebugLoc(DebugLoc());
3206 }
3207 I.dropUBImplyingAttrsAndMetadata();
3208
3209 // Drop ephemeral values.
3210 if (EphTracker.contains(&I)) {
3211 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3212 I.eraseFromParent();
3213 }
3214 }
3215
3216 // Hoist the instructions.
3217 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3218 // to these instructions, in the same way that dbg.value intrinsics are
3219 // dropped at the end of this block.
3220 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3221 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3222 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3223 // equivalent).
3224 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3225 !DVR || !DVR->isDbgAssign())
3226 It.dropOneDbgRecord(&DR);
3227 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3228 std::prev(ThenBB->end()));
3229
3230 // Insert selects and rewrite the PHI operands.
3231 IRBuilder<NoFolder> Builder(BI);
3232 for (PHINode &PN : EndBB->phis()) {
3233 unsigned OrigI = PN.getBasicBlockIndex(BB);
3234 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3235 Value *OrigV = PN.getIncomingValue(OrigI);
3236 Value *ThenV = PN.getIncomingValue(ThenI);
3237
3238 // Skip PHIs which are trivial.
3239 if (OrigV == ThenV)
3240 continue;
3241
3242 // Create a select whose true value is the speculatively executed value and
3243 // false value is the pre-existing value. Swap them if the branch
3244 // destinations were inverted.
3245 Value *TrueV = ThenV, *FalseV = OrigV;
3246 if (Invert)
3247 std::swap(TrueV, FalseV);
3248 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3249 PN.setIncomingValue(OrigI, V);
3250 PN.setIncomingValue(ThenI, V);
3251 }
3252
3253 // Remove speculated dbg intrinsics.
3254 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3255 // dbg value for the different flows and inserting it after the select.
3256 for (Instruction *I : SpeculatedDbgIntrinsics) {
3257 // We still want to know that an assignment took place so don't remove
3258 // dbg.assign intrinsics.
3259 if (!isa<DbgAssignIntrinsic>(I))
3260 I->eraseFromParent();
3261 }
3262
3263 ++NumSpeculations;
3264 return true;
3265}
3266
3267/// Return true if we can thread a branch across this block.
3269 int Size = 0;
3270 EphemeralValueTracker EphTracker;
3271
3272 // Walk the loop in reverse so that we can identify ephemeral values properly
3273 // (values only feeding assumes).
3274 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3275 // Can't fold blocks that contain noduplicate or convergent calls.
3276 if (CallInst *CI = dyn_cast<CallInst>(&I))
3277 if (CI->cannotDuplicate() || CI->isConvergent())
3278 return false;
3279
3280 // Ignore ephemeral values which are deleted during codegen.
3281 // We will delete Phis while threading, so Phis should not be accounted in
3282 // block's size.
3283 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3284 if (Size++ > MaxSmallBlockSize)
3285 return false; // Don't clone large BB's.
3286 }
3287
3288 // We can only support instructions that do not define values that are
3289 // live outside of the current basic block.
3290 for (User *U : I.users()) {
3291 Instruction *UI = cast<Instruction>(U);
3292 if (UI->getParent() != BB || isa<PHINode>(UI))
3293 return false;
3294 }
3295
3296 // Looks ok, continue checking.
3297 }
3298
3299 return true;
3300}
3301
3303 BasicBlock *To) {
3304 // Don't look past the block defining the value, we might get the value from
3305 // a previous loop iteration.
3306 auto *I = dyn_cast<Instruction>(V);
3307 if (I && I->getParent() == To)
3308 return nullptr;
3309
3310 // We know the value if the From block branches on it.
3311 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3312 if (BI && BI->isConditional() && BI->getCondition() == V &&
3313 BI->getSuccessor(0) != BI->getSuccessor(1))
3314 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3316
3317 return nullptr;
3318}
3319
3320/// If we have a conditional branch on something for which we know the constant
3321/// value in predecessors (e.g. a phi node in the current block), thread edges
3322/// from the predecessor to their ultimate destination.
3323static std::optional<bool>
3325 const DataLayout &DL,
3326 AssumptionCache *AC) {
3328 BasicBlock *BB = BI->getParent();
3329 Value *Cond = BI->getCondition();
3330 PHINode *PN = dyn_cast<PHINode>(Cond);
3331 if (PN && PN->getParent() == BB) {
3332 // Degenerate case of a single entry PHI.
3333 if (PN->getNumIncomingValues() == 1) {
3335 return true;
3336 }
3337
3338 for (Use &U : PN->incoming_values())
3339 if (auto *CB = dyn_cast<ConstantInt>(U))
3340 KnownValues[CB].insert(PN->getIncomingBlock(U));
3341 } else {
3342 for (BasicBlock *Pred : predecessors(BB)) {
3343 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3344 KnownValues[CB].insert(Pred);
3345 }
3346 }
3347
3348 if (KnownValues.empty())
3349 return false;
3350
3351 // Now we know that this block has multiple preds and two succs.
3352 // Check that the block is small enough and values defined in the block are
3353 // not used outside of it.
3355 return false;
3356
3357 for (const auto &Pair : KnownValues) {
3358 // Okay, we now know that all edges from PredBB should be revectored to
3359 // branch to RealDest.
3360 ConstantInt *CB = Pair.first;
3361 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3362 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3363
3364 if (RealDest == BB)
3365 continue; // Skip self loops.
3366
3367 // Skip if the predecessor's terminator is an indirect branch.
3368 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3369 return isa<IndirectBrInst>(PredBB->getTerminator());
3370 }))
3371 continue;
3372
3373 LLVM_DEBUG({
3374 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3375 << " has value " << *Pair.first << " in predecessors:\n";
3376 for (const BasicBlock *PredBB : Pair.second)
3377 dbgs() << " " << PredBB->getName() << "\n";
3378 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3379 });
3380
3381 // Split the predecessors we are threading into a new edge block. We'll
3382 // clone the instructions into this block, and then redirect it to RealDest.
3383 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3384
3385 // TODO: These just exist to reduce test diff, we can drop them if we like.
3386 EdgeBB->setName(RealDest->getName() + ".critedge");
3387 EdgeBB->moveBefore(RealDest);
3388
3389 // Update PHI nodes.
3390 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3391
3392 // BB may have instructions that are being threaded over. Clone these
3393 // instructions into EdgeBB. We know that there will be no uses of the
3394 // cloned instructions outside of EdgeBB.
3395 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3396 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3397 TranslateMap[Cond] = CB;
3398
3399 // RemoveDIs: track instructions that we optimise away while folding, so
3400 // that we can copy DbgVariableRecords from them later.
3401 BasicBlock::iterator SrcDbgCursor = BB->begin();
3402 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3403 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3404 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3405 continue;
3406 }
3407 // Clone the instruction.
3408 Instruction *N = BBI->clone();
3409 // Insert the new instruction into its new home.
3410 N->insertInto(EdgeBB, InsertPt);
3411
3412 if (BBI->hasName())
3413 N->setName(BBI->getName() + ".c");
3414
3415 // Update operands due to translation.
3416 for (Use &Op : N->operands()) {
3417 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3418 if (PI != TranslateMap.end())
3419 Op = PI->second;
3420 }
3421
3422 // Check for trivial simplification.
3423 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3424 if (!BBI->use_empty())
3425 TranslateMap[&*BBI] = V;
3426 if (!N->mayHaveSideEffects()) {
3427 N->eraseFromParent(); // Instruction folded away, don't need actual
3428 // inst
3429 N = nullptr;
3430 }
3431 } else {
3432 if (!BBI->use_empty())
3433 TranslateMap[&*BBI] = N;
3434 }
3435 if (N) {
3436 // Copy all debug-info attached to instructions from the last we
3437 // successfully clone, up to this instruction (they might have been
3438 // folded away).
3439 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3440 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3441 SrcDbgCursor = std::next(BBI);
3442 // Clone debug-info on this instruction too.
3443 N->cloneDebugInfoFrom(&*BBI);
3444
3445 // Register the new instruction with the assumption cache if necessary.
3446 if (auto *Assume = dyn_cast<AssumeInst>(N))
3447 if (AC)
3448 AC->registerAssumption(Assume);
3449 }
3450 }
3451
3452 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3453 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3454 InsertPt->cloneDebugInfoFrom(BI);
3455
3456 BB->removePredecessor(EdgeBB);
3457 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3458 EdgeBI->setSuccessor(0, RealDest);
3459 EdgeBI->setDebugLoc(BI->getDebugLoc());
3460
3461 if (DTU) {
3463 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3464 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3465 DTU->applyUpdates(Updates);
3466 }
3467
3468 // For simplicity, we created a separate basic block for the edge. Merge
3469 // it back into the predecessor if possible. This not only avoids
3470 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3471 // bypass the check for trivial cycles above.
3472 MergeBlockIntoPredecessor(EdgeBB, DTU);
3473
3474 // Signal repeat, simplifying any other constants.
3475 return std::nullopt;
3476 }
3477
3478 return false;
3479}
3480
3482 DomTreeUpdater *DTU,
3483 const DataLayout &DL,
3484 AssumptionCache *AC) {
3485 std::optional<bool> Result;
3486 bool EverChanged = false;
3487 do {
3488 // Note that None means "we changed things, but recurse further."
3489 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3490 EverChanged |= Result == std::nullopt || *Result;
3491 } while (Result == std::nullopt);
3492 return EverChanged;
3493}
3494
3495/// Given a BB that starts with the specified two-entry PHI node,
3496/// see if we can eliminate it.
3498 DomTreeUpdater *DTU, const DataLayout &DL) {
3499 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3500 // statement", which has a very simple dominance structure. Basically, we
3501 // are trying to find the condition that is being branched on, which
3502 // subsequently causes this merge to happen. We really want control
3503 // dependence information for this check, but simplifycfg can't keep it up
3504 // to date, and this catches most of the cases we care about anyway.
3505 BasicBlock *BB = PN->getParent();
3506
3507 BasicBlock *IfTrue, *IfFalse;
3508 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3509 if (!DomBI)
3510 return false;
3511 Value *IfCond = DomBI->getCondition();
3512 // Don't bother if the branch will be constant folded trivially.
3513 if (isa<ConstantInt>(IfCond))
3514 return false;
3515
3516 BasicBlock *DomBlock = DomBI->getParent();
3519 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3520 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3521 });
3522 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3523 "Will have either one or two blocks to speculate.");
3524
3525 // If the branch is non-unpredictable, see if we either predictably jump to
3526 // the merge bb (if we have only a single 'then' block), or if we predictably
3527 // jump to one specific 'then' block (if we have two of them).
3528 // It isn't beneficial to speculatively execute the code
3529 // from the block that we know is predictably not entered.
3530 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3531 uint64_t TWeight, FWeight;
3532 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3533 (TWeight + FWeight) != 0) {
3534 BranchProbability BITrueProb =
3535 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3537 BranchProbability BIFalseProb = BITrueProb.getCompl();
3538 if (IfBlocks.size() == 1) {
3539 BranchProbability BIBBProb =
3540 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3541 if (BIBBProb >= Likely)
3542 return false;
3543 } else {
3544 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3545 return false;
3546 }
3547 }
3548 }
3549
3550 // Don't try to fold an unreachable block. For example, the phi node itself
3551 // can't be the candidate if-condition for a select that we want to form.
3552 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3553 if (IfCondPhiInst->getParent() == BB)
3554 return false;
3555
3556 // Okay, we found that we can merge this two-entry phi node into a select.
3557 // Doing so would require us to fold *all* two entry phi nodes in this block.
3558 // At some point this becomes non-profitable (particularly if the target
3559 // doesn't support cmov's). Only do this transformation if there are two or
3560 // fewer PHI nodes in this block.
3561 unsigned NumPhis = 0;
3562 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3563 if (NumPhis > 2)
3564 return false;
3565
3566 // Loop over the PHI's seeing if we can promote them all to select
3567 // instructions. While we are at it, keep track of the instructions
3568 // that need to be moved to the dominating block.
3569 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3571 InstructionCost Budget =
3573
3574 bool Changed = false;
3575 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3576 PHINode *PN = cast<PHINode>(II++);
3577 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3578 PN->replaceAllUsesWith(V);
3579 PN->eraseFromParent();
3580 Changed = true;
3581 continue;
3582 }
3583
3584 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3585 Cost, Budget, TTI) ||
3586 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3587 Cost, Budget, TTI))
3588 return Changed;
3589 }
3590
3591 // If we folded the first phi, PN dangles at this point. Refresh it. If
3592 // we ran out of PHIs then we simplified them all.
3593 PN = dyn_cast<PHINode>(BB->begin());
3594 if (!PN)
3595 return true;
3596
3597 // Return true if at least one of these is a 'not', and another is either
3598 // a 'not' too, or a constant.
3599 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3600 if (!match(V0, m_Not(m_Value())))
3601 std::swap(V0, V1);
3602 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3603 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3604 };
3605
3606 // Don't fold i1 branches on PHIs which contain binary operators or
3607 // (possibly inverted) select form of or/ands, unless one of
3608 // the incoming values is an 'not' and another one is freely invertible.
3609 // These can often be turned into switches and other things.
3610 auto IsBinOpOrAnd = [](Value *V) {
3611 return match(
3612 V, m_CombineOr(
3613 m_BinOp(),
3616 };
3617 if (PN->getType()->isIntegerTy(1) &&
3618 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3619 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3620 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3621 PN->getIncomingValue(1)))
3622 return Changed;
3623
3624 // If all PHI nodes are promotable, check to make sure that all instructions
3625 // in the predecessor blocks can be promoted as well. If not, we won't be able
3626 // to get rid of the control flow, so it's not worth promoting to select
3627 // instructions.
3628 for (BasicBlock *IfBlock : IfBlocks)
3629 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3630 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3631 // This is not an aggressive instruction that we can promote.
3632 // Because of this, we won't be able to get rid of the control flow, so
3633 // the xform is not worth it.
3634 return Changed;
3635 }
3636
3637 // If either of the blocks has it's address taken, we can't do this fold.
3638 if (any_of(IfBlocks,
3639 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3640 return Changed;
3641
3642 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3643 << " T: " << IfTrue->getName()
3644 << " F: " << IfFalse->getName() << "\n");
3645
3646 // If we can still promote the PHI nodes after this gauntlet of tests,
3647 // do all of the PHI's now.
3648
3649 // Move all 'aggressive' instructions, which are defined in the
3650 // conditional parts of the if's up to the dominating block.
3651 for (BasicBlock *IfBlock : IfBlocks)
3652 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3653
3654 IRBuilder<NoFolder> Builder(DomBI);
3655 // Propagate fast-math-flags from phi nodes to replacement selects.
3656 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3657 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3658 if (isa<FPMathOperator>(PN))
3659 Builder.setFastMathFlags(PN->getFastMathFlags());
3660
3661 // Change the PHI node into a select instruction.
3662 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3663 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3664
3665 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3666 PN->replaceAllUsesWith(Sel);
3667 Sel->takeName(PN);
3668 PN->eraseFromParent();
3669 }
3670
3671 // At this point, all IfBlocks are empty, so our if statement
3672 // has been flattened. Change DomBlock to jump directly to our new block to
3673 // avoid other simplifycfg's kicking in on the diamond.
3674 Builder.CreateBr(BB);
3675
3677 if (DTU) {
3678 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3679 for (auto *Successor : successors(DomBlock))
3680 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3681 }
3682
3683 DomBI->eraseFromParent();
3684 if (DTU)
3685 DTU->applyUpdates(Updates);
3686
3687 return true;
3688}
3689
3691 Instruction::BinaryOps Opc, Value *LHS,
3692 Value *RHS, const Twine &Name = "") {
3693 // Try to relax logical op to binary op.
3694 if (impliesPoison(RHS, LHS))
3695 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3696 if (Opc == Instruction::And)
3697 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3698 if (Opc == Instruction::Or)
3699 return Builder.CreateLogicalOr(LHS, RHS, Name);
3700 llvm_unreachable("Invalid logical opcode");
3701}
3702
3703/// Return true if either PBI or BI has branch weight available, and store
3704/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3705/// not have branch weight, use 1:1 as its weight.
3707 uint64_t &PredTrueWeight,
3708 uint64_t &PredFalseWeight,
3709 uint64_t &SuccTrueWeight,
3710 uint64_t &SuccFalseWeight) {
3711 bool PredHasWeights =
3712 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3713 bool SuccHasWeights =
3714 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3715 if (PredHasWeights || SuccHasWeights) {
3716 if (!PredHasWeights)
3717 PredTrueWeight = PredFalseWeight = 1;
3718 if (!SuccHasWeights)
3719 SuccTrueWeight = SuccFalseWeight = 1;
3720 return true;
3721 } else {
3722 return false;
3723 }
3724}
3725
3726/// Determine if the two branches share a common destination and deduce a glue
3727/// that joins the branches' conditions to arrive at the common destination if
3728/// that would be profitable.
3729static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3731 const TargetTransformInfo *TTI) {
3732 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3733 "Both blocks must end with a conditional branches.");
3735 "PredBB must be a predecessor of BB.");
3736
3737 // We have the potential to fold the conditions together, but if the
3738 // predecessor branch is predictable, we may not want to merge them.
3739 uint64_t PTWeight, PFWeight;
3740 BranchProbability PBITrueProb, Likely;
3741 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3742 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3743 (PTWeight + PFWeight) != 0) {
3744 PBITrueProb =
3745 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3747 }
3748
3749 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3750 // Speculate the 2nd condition unless the 1st is probably true.
3751 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3752 return {{BI->getSuccessor(0), Instruction::Or, false}};
3753 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3754 // Speculate the 2nd condition unless the 1st is probably false.
3755 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3756 return {{BI->getSuccessor(1), Instruction::And, false}};
3757 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3758 // Speculate the 2nd condition unless the 1st is probably true.
3759 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3760 return {{BI->getSuccessor(1), Instruction::And, true}};
3761 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3762 // Speculate the 2nd condition unless the 1st is probably false.
3763 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3764 return {{BI->getSuccessor(0), Instruction::Or, true}};
3765 }
3766 return std::nullopt;
3767}
3768
3770 DomTreeUpdater *DTU,
3771 MemorySSAUpdater *MSSAU,
3772 const TargetTransformInfo *TTI) {
3773 BasicBlock *BB = BI->getParent();
3774 BasicBlock *PredBlock = PBI->getParent();
3775
3776 // Determine if the two branches share a common destination.
3777 BasicBlock *CommonSucc;
3779 bool InvertPredCond;
3780 std::tie(CommonSucc, Opc, InvertPredCond) =
3782
3783 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3784
3785 IRBuilder<> Builder(PBI);
3786 // The builder is used to create instructions to eliminate the branch in BB.
3787 // If BB's terminator has !annotation metadata, add it to the new
3788 // instructions.
3790 {LLVMContext::MD_annotation});
3791
3792 // If we need to invert the condition in the pred block to match, do so now.
3793 if (InvertPredCond) {
3794 InvertBranch(PBI, Builder);
3795 }
3796
3797 BasicBlock *UniqueSucc =
3798 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3799
3800 // Before cloning instructions, notify the successor basic block that it
3801 // is about to have a new predecessor. This will update PHI nodes,
3802 // which will allow us to update live-out uses of bonus instructions.
3803 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3804
3805 // Try to update branch weights.
3806 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3807 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3808 SuccTrueWeight, SuccFalseWeight)) {
3809 SmallVector<uint64_t, 8> NewWeights;
3810
3811 if (PBI->getSuccessor(0) == BB) {
3812 // PBI: br i1 %x, BB, FalseDest
3813 // BI: br i1 %y, UniqueSucc, FalseDest
3814 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3815 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3816 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3817 // TrueWeight for PBI * FalseWeight for BI.
3818 // We assume that total weights of a BranchInst can fit into 32 bits.
3819 // Therefore, we will not have overflow using 64-bit arithmetic.
3820 NewWeights.push_back(PredFalseWeight *
3821 (SuccFalseWeight + SuccTrueWeight) +
3822 PredTrueWeight * SuccFalseWeight);
3823 } else {
3824 // PBI: br i1 %x, TrueDest, BB
3825 // BI: br i1 %y, TrueDest, UniqueSucc
3826 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3827 // FalseWeight for PBI * TrueWeight for BI.
3828 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3829 PredFalseWeight * SuccTrueWeight);
3830 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3831 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3832 }
3833
3834 // Halve the weights if any of them cannot fit in an uint32_t
3835 FitWeights(NewWeights);
3836
3837 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3838 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3839
3840 // TODO: If BB is reachable from all paths through PredBlock, then we
3841 // could replace PBI's branch probabilities with BI's.
3842 } else
3843 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3844
3845 // Now, update the CFG.
3846 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3847
3848 if (DTU)
3849 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3850 {DominatorTree::Delete, PredBlock, BB}});
3851
3852 // If BI was a loop latch, it may have had associated loop metadata.
3853 // We need to copy it to the new latch, that is, PBI.
3854 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3855 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3856
3857 ValueToValueMapTy VMap; // maps original values to cloned values
3859
3860 Module *M = BB->getModule();
3861
3862 if (PredBlock->IsNewDbgInfoFormat) {
3863 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3864 for (DbgVariableRecord &DVR :
3866 RemapDbgVariableRecord(M, &DVR, VMap,
3868 }
3869 }
3870
3871 // Now that the Cond was cloned into the predecessor basic block,
3872 // or/and the two conditions together.
3873 Value *BICond = VMap[BI->getCondition()];
3874 PBI->setCondition(
3875 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3876
3877 ++NumFoldBranchToCommonDest;
3878 return true;
3879}
3880
3881/// Return if an instruction's type or any of its operands' types are a vector
3882/// type.
3883static bool isVectorOp(Instruction &I) {
3884 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3885 return U->getType()->isVectorTy();
3886 });
3887}
3888
3889/// If this basic block is simple enough, and if a predecessor branches to us
3890/// and one of our successors, fold the block into the predecessor and use
3891/// logical operations to pick the right destination.
3893 MemorySSAUpdater *MSSAU,
3894 const TargetTransformInfo *TTI,
3895 unsigned BonusInstThreshold) {
3896 // If this block ends with an unconditional branch,
3897 // let SpeculativelyExecuteBB() deal with it.
3898 if (!BI->isConditional())
3899 return false;
3900
3901 BasicBlock *BB = BI->getParent();
3905
3906 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3907
3908 if (!Cond ||
3909 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3910 !isa<SelectInst>(Cond)) ||
3911 Cond->getParent() != BB || !Cond->hasOneUse())
3912 return false;
3913
3914 // Finally, don't infinitely unroll conditional loops.
3915 if (is_contained(successors(BB), BB))
3916 return false;
3917
3918 // With which predecessors will we want to deal with?
3920 for (BasicBlock *PredBlock : predecessors(BB)) {
3921 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3922
3923 // Check that we have two conditional branches. If there is a PHI node in
3924 // the common successor, verify that the same value flows in from both
3925 // blocks.
3926 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3927 continue;
3928
3929 // Determine if the two branches share a common destination.
3930 BasicBlock *CommonSucc;
3932 bool InvertPredCond;
3933 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3934 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3935 else
3936 continue;
3937
3938 // Check the cost of inserting the necessary logic before performing the
3939 // transformation.
3940 if (TTI) {
3941 Type *Ty = BI->getCondition()->getType();
3943 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3944 !isa<CmpInst>(PBI->getCondition())))
3945 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3946
3948 continue;
3949 }
3950
3951 // Ok, we do want to deal with this predecessor. Record it.
3952 Preds.emplace_back(PredBlock);
3953 }
3954
3955 // If there aren't any predecessors into which we can fold,
3956 // don't bother checking the cost.
3957 if (Preds.empty())
3958 return false;
3959
3960 // Only allow this transformation if computing the condition doesn't involve
3961 // too many instructions and these involved instructions can be executed
3962 // unconditionally. We denote all involved instructions except the condition
3963 // as "bonus instructions", and only allow this transformation when the
3964 // number of the bonus instructions we'll need to create when cloning into
3965 // each predecessor does not exceed a certain threshold.
3966 unsigned NumBonusInsts = 0;
3967 bool SawVectorOp = false;
3968 const unsigned PredCount = Preds.size();
3969 for (Instruction &I : *BB) {
3970 // Don't check the branch condition comparison itself.
3971 if (&I == Cond)
3972 continue;
3973 // Ignore dbg intrinsics, and the terminator.
3974 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3975 continue;
3976 // I must be safe to execute unconditionally.
3978 return false;
3979 SawVectorOp |= isVectorOp(I);
3980
3981 // Account for the cost of duplicating this instruction into each
3982 // predecessor. Ignore free instructions.
3983 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3985 NumBonusInsts += PredCount;
3986
3987 // Early exits once we reach the limit.
3988 if (NumBonusInsts >
3989 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3990 return false;
3991 }
3992
3993 auto IsBCSSAUse = [BB, &I](Use &U) {
3994 auto *UI = cast<Instruction>(U.getUser());
3995 if (auto *PN = dyn_cast<PHINode>(UI))
3996 return PN->getIncomingBlock(U) == BB;
3997 return UI->getParent() == BB && I.comesBefore(UI);
3998 };
3999
4000 // Does this instruction require rewriting of uses?
4001 if (!all_of(I.uses(), IsBCSSAUse))
4002 return false;
4003 }
4004 if (NumBonusInsts >
4005 BonusInstThreshold *
4006 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4007 return false;
4008
4009 // Ok, we have the budget. Perform the transformation.
4010 for (BasicBlock *PredBlock : Preds) {
4011 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4012 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4013 }
4014 return false;
4015}
4016
4017// If there is only one store in BB1 and BB2, return it, otherwise return
4018// nullptr.
4020 StoreInst *S = nullptr;
4021 for (auto *BB : {BB1, BB2}) {
4022 if (!BB)
4023 continue;
4024 for (auto &I : *BB)
4025 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4026 if (S)
4027 // Multiple stores seen.
4028 return nullptr;
4029 else
4030 S = SI;
4031 }
4032 }
4033 return S;
4034}
4035
4037 Value *AlternativeV = nullptr) {
4038 // PHI is going to be a PHI node that allows the value V that is defined in
4039 // BB to be referenced in BB's only successor.
4040 //
4041 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4042 // doesn't matter to us what the other operand is (it'll never get used). We
4043 // could just create a new PHI with an undef incoming value, but that could
4044 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4045 // other PHI. So here we directly look for some PHI in BB's successor with V
4046 // as an incoming operand. If we find one, we use it, else we create a new
4047 // one.
4048 //
4049 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4050 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4051 // where OtherBB is the single other predecessor of BB's only successor.
4052 PHINode *PHI = nullptr;
4053 BasicBlock *Succ = BB->getSingleSuccessor();
4054
4055 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4056 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4057 PHI = cast<PHINode>(I);
4058 if (!AlternativeV)
4059 break;
4060
4061 assert(Succ->hasNPredecessors(2));
4062 auto PredI = pred_begin(Succ);
4063 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4064 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4065 break;
4066 PHI = nullptr;
4067 }
4068 if (PHI)
4069 return PHI;
4070
4071 // If V is not an instruction defined in BB, just return it.
4072 if (!AlternativeV &&
4073 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4074 return V;
4075
4076 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4077 PHI->insertBefore(Succ->begin());
4078 PHI->addIncoming(V, BB);
4079 for (BasicBlock *PredBB : predecessors(Succ))
4080 if (PredBB != BB)
4081 PHI->addIncoming(
4082 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4083 return PHI;
4084}
4085
4087 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4088 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4089 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4090 // For every pointer, there must be exactly two stores, one coming from
4091 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4092 // store (to any address) in PTB,PFB or QTB,QFB.
4093 // FIXME: We could relax this restriction with a bit more work and performance
4094 // testing.
4095 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4096 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4097 if (!PStore || !QStore)
4098 return false;
4099
4100 // Now check the stores are compatible.
4101 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4102 PStore->getValueOperand()->getType() !=
4103 QStore->getValueOperand()->getType())
4104 return false;
4105
4106 // Check that sinking the store won't cause program behavior changes. Sinking
4107 // the store out of the Q blocks won't change any behavior as we're sinking
4108 // from a block to its unconditional successor. But we're moving a store from
4109 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4110 // So we need to check that there are no aliasing loads or stores in
4111 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4112 // operations between PStore and the end of its parent block.
4113 //
4114 // The ideal way to do this is to query AliasAnalysis, but we don't
4115 // preserve AA currently so that is dangerous. Be super safe and just
4116 // check there are no other memory operations at all.
4117 for (auto &I : *QFB->getSinglePredecessor())
4118 if (I.mayReadOrWriteMemory())
4119 return false;
4120 for (auto &I : *QFB)
4121 if (&I != QStore && I.mayReadOrWriteMemory())
4122 return false;
4123 if (QTB)
4124 for (auto &I : *QTB)
4125 if (&I != QStore && I.mayReadOrWriteMemory())
4126 return false;
4127 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4128 I != E; ++I)
4129 if (&*I != PStore && I->mayReadOrWriteMemory())
4130 return false;
4131
4132 // If we're not in aggressive mode, we only optimize if we have some
4133 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4134 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4135 if (!BB)
4136 return true;
4137 // Heuristic: if the block can be if-converted/phi-folded and the
4138 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4139 // thread this store.
4141 InstructionCost Budget =
4143 for (auto &I : BB->instructionsWithoutDebug(false)) {
4144 // Consider terminator instruction to be free.
4145 if (I.isTerminator())
4146 continue;
4147 // If this is one the stores that we want to speculate out of this BB,
4148 // then don't count it's cost, consider it to be free.
4149 if (auto *S = dyn_cast<StoreInst>(&I))
4150 if (llvm::find(FreeStores, S))
4151 continue;
4152 // Else, we have a white-list of instructions that we are ak speculating.
4153 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4154 return false; // Not in white-list - not worthwhile folding.
4155 // And finally, if this is a non-free instruction that we are okay
4156 // speculating, ensure that we consider the speculation budget.
4157 Cost +=
4159 if (Cost > Budget)
4160 return false; // Eagerly refuse to fold as soon as we're out of budget.
4161 }
4162 assert(Cost <= Budget &&
4163 "When we run out of budget we will eagerly return from within the "
4164 "per-instruction loop.");
4165 return true;
4166 };
4167
4168 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4170 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4171 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4172 return false;
4173
4174 // If PostBB has more than two predecessors, we need to split it so we can
4175 // sink the store.
4176 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4177 // We know that QFB's only successor is PostBB. And QFB has a single
4178 // predecessor. If QTB exists, then its only successor is also PostBB.
4179 // If QTB does not exist, then QFB's only predecessor has a conditional
4180 // branch to QFB and PostBB.
4181 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4182 BasicBlock *NewBB =
4183 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4184 if (!NewBB)
4185 return false;
4186 PostBB = NewBB;
4187 }
4188
4189 // OK, we're going to sink the stores to PostBB. The store has to be
4190 // conditional though, so first create the predicate.
4191 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4192 ->getCondition();
4193 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4194 ->getCondition();
4195
4197 PStore->getParent());
4199 QStore->getParent(), PPHI);
4200
4201 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4202 IRBuilder<> QB(PostBB, PostBBFirst);
4203 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4204
4205 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4206 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4207
4208 if (InvertPCond)
4209 PPred = QB.CreateNot(PPred);
4210 if (InvertQCond)
4211 QPred = QB.CreateNot(QPred);
4212 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4213
4214 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4215 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4216 /*Unreachable=*/false,
4217 /*BranchWeights=*/nullptr, DTU);
4218
4219 QB.SetInsertPoint(T);
4220 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4221 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4222 // Choose the minimum alignment. If we could prove both stores execute, we
4223 // could use biggest one. In this case, though, we only know that one of the
4224 // stores executes. And we don't know it's safe to take the alignment from a
4225 // store that doesn't execute.
4226 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4227
4228 QStore->eraseFromParent();
4229 PStore->eraseFromParent();
4230
4231 return true;
4232}
4233
4235 DomTreeUpdater *DTU, const DataLayout &DL,
4236 const TargetTransformInfo &TTI) {
4237 // The intention here is to find diamonds or triangles (see below) where each
4238 // conditional block contains a store to the same address. Both of these
4239 // stores are conditional, so they can't be unconditionally sunk. But it may
4240 // be profitable to speculatively sink the stores into one merged store at the
4241 // end, and predicate the merged store on the union of the two conditions of
4242 // PBI and QBI.
4243 //
4244 // This can reduce the number of stores executed if both of the conditions are
4245 // true, and can allow the blocks to become small enough to be if-converted.
4246 // This optimization will also chain, so that ladders of test-and-set
4247 // sequences can be if-converted away.
4248 //
4249 // We only deal with simple diamonds or triangles:
4250 //
4251 // PBI or PBI or a combination of the two
4252 // / \ | \
4253 // PTB PFB | PFB
4254 // \ / | /
4255 // QBI QBI
4256 // / \ | \
4257 // QTB QFB | QFB
4258 // \ / | /
4259 // PostBB PostBB
4260 //
4261 // We model triangles as a type of diamond with a nullptr "true" block.
4262 // Triangles are canonicalized so that the fallthrough edge is represented by
4263 // a true condition, as in the diagram above.
4264 BasicBlock *PTB = PBI->getSuccessor(0);
4265 BasicBlock *PFB = PBI->getSuccessor(1);
4266 BasicBlock *QTB = QBI->getSuccessor(0);
4267 BasicBlock *QFB = QBI->getSuccessor(1);
4268 BasicBlock *PostBB = QFB->getSingleSuccessor();
4269
4270 // Make sure we have a good guess for PostBB. If QTB's only successor is
4271 // QFB, then QFB is a better PostBB.
4272 if (QTB->getSingleSuccessor() == QFB)
4273 PostBB = QFB;
4274
4275 // If we couldn't find a good PostBB, stop.
4276 if (!PostBB)
4277 return false;
4278
4279 bool InvertPCond = false, InvertQCond = false;
4280 // Canonicalize fallthroughs to the true branches.
4281 if (PFB == QBI->getParent()) {
4282 std::swap(PFB, PTB);
4283 InvertPCond = true;
4284 }
4285 if (QFB == PostBB) {
4286 std::swap(QFB, QTB);
4287 InvertQCond = true;
4288 }
4289
4290 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4291 // and QFB may not. Model fallthroughs as a nullptr block.
4292 if (PTB == QBI->getParent())
4293 PTB = nullptr;
4294 if (QTB == PostBB)
4295 QTB = nullptr;
4296
4297 // Legality bailouts. We must have at least the non-fallthrough blocks and
4298 // the post-dominating block, and the non-fallthroughs must only have one
4299 // predecessor.
4300 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4301 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4302 };
4303 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4304 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4305 return false;
4306 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4307 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4308 return false;
4309 if (!QBI->getParent()->hasNUses(2))
4310 return false;
4311
4312 // OK, this is a sequence of two diamonds or triangles.
4313 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4314 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4315 for (auto *BB : {PTB, PFB}) {
4316 if (!BB)
4317 continue;
4318 for (auto &I : *BB)
4319 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4320 PStoreAddresses.insert(SI->getPointerOperand());
4321 }
4322 for (auto *BB : {QTB, QFB}) {
4323 if (!BB)
4324 continue;
4325 for (auto &I : *BB)
4326 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4327 QStoreAddresses.insert(SI->getPointerOperand());
4328 }
4329
4330 set_intersect(PStoreAddresses, QStoreAddresses);
4331 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4332 // clear what it contains.
4333 auto &CommonAddresses = PStoreAddresses;
4334
4335 bool Changed = false;
4336 for (auto *Address : CommonAddresses)
4337 Changed |=
4338 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4339 InvertPCond, InvertQCond, DTU, DL, TTI);
4340 return Changed;
4341}
4342
4343/// If the previous block ended with a widenable branch, determine if reusing
4344/// the target block is profitable and legal. This will have the effect of
4345/// "widening" PBI, but doesn't require us to reason about hosting safety.
4347 DomTreeUpdater *DTU) {
4348 // TODO: This can be generalized in two important ways:
4349 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4350 // values from the PBI edge.
4351 // 2) We can sink side effecting instructions into BI's fallthrough
4352 // successor provided they doesn't contribute to computation of
4353 // BI's condition.
4354 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4355 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4356 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4358 return false;
4359 if (!IfFalseBB->phis().empty())
4360 return false; // TODO
4361 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4362 // may undo the transform done here.
4363 // TODO: There might be a more fine-grained solution to this.
4364 if (!llvm::succ_empty(IfFalseBB))
4365 return false;
4366 // Use lambda to lazily compute expensive condition after cheap ones.
4367 auto NoSideEffects = [](BasicBlock &BB) {
4368 return llvm::none_of(BB, [](const Instruction &I) {
4369 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4370 });
4371 };
4372 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4373 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4374 NoSideEffects(*BI->getParent())) {
4375 auto *OldSuccessor = BI->getSuccessor(1);
4376 OldSuccessor->removePredecessor(BI->getParent());
4377 BI->setSuccessor(1, IfFalseBB);
4378 if (DTU)
4379 DTU->applyUpdates(
4380 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4381 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4382 return true;
4383 }
4384 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4385 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4386 NoSideEffects(*BI->getParent())) {
4387 auto *OldSuccessor = BI->getSuccessor(0);
4388 OldSuccessor->removePredecessor(BI->getParent());
4389 BI->setSuccessor(0, IfFalseBB);
4390 if (DTU)
4391 DTU->applyUpdates(
4392 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4393 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4394 return true;
4395 }
4396 return false;
4397}
4398
4399/// If we have a conditional branch as a predecessor of another block,
4400/// this function tries to simplify it. We know
4401/// that PBI and BI are both conditional branches, and BI is in one of the
4402/// successor blocks of PBI - PBI branches to BI.
4404 DomTreeUpdater *DTU,
4405 const DataLayout &DL,
4406 const TargetTransformInfo &TTI) {
4407 assert(PBI->isConditional() && BI->isConditional());
4408 BasicBlock *BB = BI->getParent();
4409
4410 // If this block ends with a branch instruction, and if there is a
4411 // predecessor that ends on a branch of the same condition, make
4412 // this conditional branch redundant.
4413 if (PBI->getCondition() == BI->getCondition() &&
4414 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4415 // Okay, the outcome of this conditional branch is statically
4416 // knowable. If this block had a single pred, handle specially, otherwise
4417 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4418 if (BB->getSinglePredecessor()) {
4419 // Turn this into a branch on constant.
4420 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4421 BI->setCondition(
4422 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4423 return true; // Nuke the branch on constant.
4424 }
4425 }
4426
4427 // If the previous block ended with a widenable branch, determine if reusing
4428 // the target block is profitable and legal. This will have the effect of
4429 // "widening" PBI, but doesn't require us to reason about hosting safety.
4430 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4431 return true;
4432
4433 // If both branches are conditional and both contain stores to the same
4434 // address, remove the stores from the conditionals and create a conditional
4435 // merged store at the end.
4436 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4437 return true;
4438
4439 // If this is a conditional branch in an empty block, and if any
4440 // predecessors are a conditional branch to one of our destinations,
4441 // fold the conditions into logical ops and one cond br.
4442
4443 // Ignore dbg intrinsics.
4444 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4445 return false;
4446
4447 int PBIOp, BIOp;
4448 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4449 PBIOp = 0;
4450 BIOp = 0;
4451 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4452 PBIOp = 0;
4453 BIOp = 1;
4454 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4455 PBIOp = 1;
4456 BIOp = 0;
4457 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4458 PBIOp = 1;
4459 BIOp = 1;
4460 } else {
4461 return false;
4462 }
4463
4464 // Check to make sure that the other destination of this branch
4465 // isn't BB itself. If so, this is an infinite loop that will
4466 // keep getting unwound.
4467 if (PBI->getSuccessor(PBIOp) == BB)
4468 return false;
4469
4470 // If predecessor's branch probability to BB is too low don't merge branches.
4471 SmallVector<uint32_t, 2> PredWeights;
4472 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4473 extractBranchWeights(*PBI, PredWeights) &&
4474 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4475
4477 PredWeights[PBIOp],
4478 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4479
4481 if (CommonDestProb >= Likely)
4482 return false;
4483 }
4484
4485 // Do not perform this transformation if it would require
4486 // insertion of a large number of select instructions. For targets
4487 // without predication/cmovs, this is a big pessimization.
4488
4489 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4490 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4491 unsigned NumPhis = 0;
4492 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4493 ++II, ++NumPhis) {
4494 if (NumPhis > 2) // Disable this xform.
4495 return false;
4496 }
4497
4498 // Finally, if everything is ok, fold the branches to logical ops.
4499 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4500
4501 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4502 << "AND: " << *BI->getParent());
4503
4505
4506 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4507 // branch in it, where one edge (OtherDest) goes back to itself but the other
4508 // exits. We don't *know* that the program avoids the infinite loop
4509 // (even though that seems likely). If we do this xform naively, we'll end up
4510 // recursively unpeeling the loop. Since we know that (after the xform is
4511 // done) that the block *is* infinite if reached, we just make it an obviously
4512 // infinite loop with no cond branch.
4513 if (OtherDest == BB) {
4514 // Insert it at the end of the function, because it's either code,
4515 // or it won't matter if it's hot. :)
4516 BasicBlock *InfLoopBlock =
4517 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4518 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4519 if (DTU)
4520 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4521 OtherDest = InfLoopBlock;
4522 }
4523
4524 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4525
4526 // BI may have other predecessors. Because of this, we leave
4527 // it alone, but modify PBI.
4528
4529 // Make sure we get to CommonDest on True&True directions.
4530 Value *PBICond = PBI->getCondition();
4531 IRBuilder<NoFolder> Builder(PBI);
4532 if (PBIOp)
4533 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4534
4535 Value *BICond = BI->getCondition();
4536 if (BIOp)
4537 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4538
4539 // Merge the conditions.
4540 Value *Cond =
4541 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4542
4543 // Modify PBI to branch on the new condition to the new dests.
4544 PBI->setCondition(Cond);
4545 PBI->setSuccessor(0, CommonDest);
4546 PBI->setSuccessor(1, OtherDest);
4547
4548 if (DTU) {
4549 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4550 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4551
4552 DTU->applyUpdates(Updates);
4553 }
4554
4555 // Update branch weight for PBI.
4556 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4557 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4558 bool HasWeights =
4559 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4560 SuccTrueWeight, SuccFalseWeight);
4561 if (HasWeights) {
4562 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4563 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4564 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4565 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4566 // The weight to CommonDest should be PredCommon * SuccTotal +
4567 // PredOther * SuccCommon.
4568 // The weight to OtherDest should be PredOther * SuccOther.
4569 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4570 PredOther * SuccCommon,
4571 PredOther * SuccOther};
4572 // Halve the weights if any of them cannot fit in an uint32_t
4573 FitWeights(NewWeights);
4574
4575 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4576 }
4577
4578 // OtherDest may have phi nodes. If so, add an entry from PBI's
4579 // block that are identical to the entries for BI's block.
4580 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4581
4582 // We know that the CommonDest already had an edge from PBI to
4583 // it. If it has PHIs though, the PHIs may have different
4584 // entries for BB and PBI's BB. If so, insert a select to make
4585 // them agree.
4586 for (PHINode &PN : CommonDest->phis()) {
4587 Value *BIV = PN.getIncomingValueForBlock(BB);
4588 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4589 Value *PBIV = PN.getIncomingValue(PBBIdx);
4590 if (BIV != PBIV) {
4591 // Insert a select in PBI to pick the right value.
4592 SelectInst *NV = cast<SelectInst>(
4593 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4594 PN.setIncomingValue(PBBIdx, NV);
4595 // Although the select has the same condition as PBI, the original branch
4596 // weights for PBI do not apply to the new select because the select's
4597 // 'logical' edges are incoming edges of the phi that is eliminated, not
4598 // the outgoing edges of PBI.
4599 if (HasWeights) {
4600 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4601 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4602 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4603 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4604 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4605 // The weight to PredOtherDest should be PredOther * SuccCommon.
4606 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4607 PredOther * SuccCommon};
4608
4609 FitWeights(NewWeights);
4610
4611 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4612 }
4613 }
4614 }
4615
4616 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4617 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4618
4619 // This basic block is probably dead. We know it has at least
4620 // one fewer predecessor.
4621 return true;
4622}
4623
4624// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4625// true or to FalseBB if Cond is false.
4626// Takes care of updating the successors and removing the old terminator.
4627// Also makes sure not to introduce new successors by assuming that edges to
4628// non-successor TrueBBs and FalseBBs aren't reachable.
4629bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4630 Value *Cond, BasicBlock *TrueBB,
4631 BasicBlock *FalseBB,
4632 uint32_t TrueWeight,
4633 uint32_t FalseWeight) {
4634 auto *BB = OldTerm->getParent();
4635 // Remove any superfluous successor edges from the CFG.
4636 // First, figure out which successors to preserve.
4637 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4638 // successor.
4639 BasicBlock *KeepEdge1 = TrueBB;
4640 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4641
4642 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4643
4644 // Then remove the rest.
4645 for (BasicBlock *Succ : successors(OldTerm)) {
4646 // Make sure only to keep exactly one copy of each edge.
4647 if (Succ == KeepEdge1)
4648 KeepEdge1 = nullptr;
4649 else if (Succ == KeepEdge2)
4650 KeepEdge2 = nullptr;
4651 else {
4652 Succ->removePredecessor(BB,
4653 /*KeepOneInputPHIs=*/true);
4654
4655 if (Succ != TrueBB && Succ != FalseBB)
4656 RemovedSuccessors.insert(Succ);
4657 }
4658 }
4659
4660 IRBuilder<> Builder(OldTerm);
4661 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4662
4663 // Insert an appropriate new terminator.
4664 if (!KeepEdge1 && !KeepEdge2) {
4665 if (TrueBB == FalseBB) {
4666 // We were only looking for one successor, and it was present.
4667 // Create an unconditional branch to it.
4668 Builder.CreateBr(TrueBB);
4669 } else {
4670 // We found both of the successors we were looking for.
4671 // Create a conditional branch sharing the condition of the select.
4672 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4673 if (TrueWeight != FalseWeight)
4674 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4675 }
4676 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4677 // Neither of the selected blocks were successors, so this
4678 // terminator must be unreachable.
4679 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4680 } else {
4681 // One of the selected values was a successor, but the other wasn't.
4682 // Insert an unconditional branch to the one that was found;
4683 // the edge to the one that wasn't must be unreachable.
4684 if (!KeepEdge1) {
4685 // Only TrueBB was found.
4686 Builder.CreateBr(TrueBB);
4687 } else {
4688 // Only FalseBB was found.
4689 Builder.CreateBr(FalseBB);
4690 }
4691 }
4692
4694
4695 if (DTU) {
4697 Updates.reserve(RemovedSuccessors.size());
4698 for (auto *RemovedSuccessor : RemovedSuccessors)
4699 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4700 DTU->applyUpdates(Updates);
4701 }
4702
4703 return true;
4704}
4705
4706// Replaces
4707// (switch (select cond, X, Y)) on constant X, Y
4708// with a branch - conditional if X and Y lead to distinct BBs,
4709// unconditional otherwise.
4710bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4711 SelectInst *Select) {
4712 // Check for constant integer values in the select.
4713 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4714 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4715 if (!TrueVal || !FalseVal)
4716 return false;
4717
4718 // Find the relevant condition and destinations.
4719 Value *Condition = Select->getCondition();
4720 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4721 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4722
4723 // Get weight for TrueBB and FalseBB.
4724 uint32_t TrueWeight = 0, FalseWeight = 0;
4726 bool HasWeights = hasBranchWeightMD(*SI);
4727 if (HasWeights) {
4728 GetBranchWeights(SI, Weights);
4729 if (Weights.size() == 1 + SI->getNumCases()) {
4730 TrueWeight =
4731 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4732 FalseWeight =
4733 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4734 }
4735 }
4736
4737 // Perform the actual simplification.
4738 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4739 FalseWeight);
4740}
4741
4742// Replaces
4743// (indirectbr (select cond, blockaddress(@fn, BlockA),
4744// blockaddress(@fn, BlockB)))
4745// with
4746// (br cond, BlockA, BlockB).
4747bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4748 SelectInst *SI) {
4749 // Check that both operands of the select are block addresses.
4750 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4751 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4752 if (!TBA || !FBA)
4753 return false;
4754
4755 // Extract the actual blocks.
4756 BasicBlock *TrueBB = TBA->getBasicBlock();
4757 BasicBlock *FalseBB = FBA->getBasicBlock();
4758
4759 // Perform the actual simplification.
4760 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4761 0);
4762}
4763
4764/// This is called when we find an icmp instruction
4765/// (a seteq/setne with a constant) as the only instruction in a
4766/// block that ends with an uncond branch. We are looking for a very specific
4767/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4768/// this case, we merge the first two "or's of icmp" into a switch, but then the
4769/// default value goes to an uncond block with a seteq in it, we get something
4770/// like:
4771///
4772/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4773/// DEFAULT:
4774/// %tmp = icmp eq i8 %A, 92
4775/// br label %end
4776/// end:
4777/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4778///
4779/// We prefer to split the edge to 'end' so that there is a true/false entry to
4780/// the PHI, merging the third icmp into the switch.
4781bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4782 ICmpInst *ICI, IRBuilder<> &Builder) {
4783 BasicBlock *BB = ICI->getParent();
4784
4785 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4786 // complex.
4787 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4788 return false;
4789
4790 Value *V = ICI->getOperand(0);
4791 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4792
4793 // The pattern we're looking for is where our only predecessor is a switch on
4794 // 'V' and this block is the default case for the switch. In this case we can
4795 // fold the compared value into the switch to simplify things.
4796 BasicBlock *Pred = BB->getSinglePredecessor();
4797 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4798 return false;
4799
4800 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4801 if (SI->getCondition() != V)
4802 return false;
4803
4804 // If BB is reachable on a non-default case, then we simply know the value of
4805 // V in this block. Substitute it and constant fold the icmp instruction
4806 // away.
4807 if (SI->getDefaultDest() != BB) {
4808 ConstantInt *VVal = SI->findCaseDest(BB);
4809 assert(VVal && "Should have a unique destination value");
4810 ICI->setOperand(0, VVal);
4811
4812 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4813 ICI->replaceAllUsesWith(V);
4814 ICI->eraseFromParent();
4815 }
4816 // BB is now empty, so it is likely to simplify away.
4817 return requestResimplify();
4818 }
4819
4820 // Ok, the block is reachable from the default dest. If the constant we're
4821 // comparing exists in one of the other edges, then we can constant fold ICI
4822 // and zap it.
4823 if (SI->findCaseValue(Cst) != SI->case_default()) {
4824 Value *V;
4825 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4827 else
4829
4830 ICI->replaceAllUsesWith(V);
4831 ICI->eraseFromParent();
4832 // BB is now empty, so it is likely to simplify away.
4833 return requestResimplify();
4834 }
4835
4836 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4837 // the block.
4838 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4839 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4840 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4841 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4842 return false;
4843
4844 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4845 // true in the PHI.
4846 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4847 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4848
4849 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4850 std::swap(DefaultCst, NewCst);
4851
4852 // Replace ICI (which is used by the PHI for the default value) with true or
4853 // false depending on if it is EQ or NE.
4854 ICI->replaceAllUsesWith(DefaultCst);
4855 ICI->eraseFromParent();
4856
4858
4859 // Okay, the switch goes to this block on a default value. Add an edge from
4860 // the switch to the merge point on the compared value.
4861 BasicBlock *NewBB =
4862 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4863 {
4865 auto W0 = SIW.getSuccessorWeight(0);
4867 if (W0) {
4868 NewW = ((uint64_t(*W0) + 1) >> 1);
4869 SIW.setSuccessorWeight(0, *NewW);
4870 }
4871 SIW.addCase(Cst, NewBB, NewW);
4872 if (DTU)
4873 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4874 }
4875
4876 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4877 Builder.SetInsertPoint(NewBB);
4878 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4879 Builder.CreateBr(SuccBlock);
4880 PHIUse->addIncoming(NewCst, NewBB);
4881 if (DTU) {
4882 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4883 DTU->applyUpdates(Updates);
4884 }
4885 return true;
4886}
4887
4888/// The specified branch is a conditional branch.
4889/// Check to see if it is branching on an or/and chain of icmp instructions, and
4890/// fold it into a switch instruction if so.
4891bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4892 IRBuilder<> &Builder,
4893 const DataLayout &DL) {
4894 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4895 if (!Cond)
4896 return false;
4897
4898 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4899 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4900 // 'setne's and'ed together, collect them.
4901
4902 // Try to gather values from a chain of and/or to be turned into a switch
4903 ConstantComparesGatherer ConstantCompare(Cond, DL);
4904 // Unpack the result
4905 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4906 Value *CompVal = ConstantCompare.CompValue;
4907 unsigned UsedICmps = ConstantCompare.UsedICmps;
4908 Value *ExtraCase = ConstantCompare.Extra;
4909
4910 // If we didn't have a multiply compared value, fail.
4911 if (!CompVal)
4912 return false;
4913
4914 // Avoid turning single icmps into a switch.
4915 if (UsedICmps <= 1)
4916 return false;
4917
4918 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4919
4920 // There might be duplicate constants in the list, which the switch
4921 // instruction can't handle, remove them now.
4922 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4923 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4924
4925 // If Extra was used, we require at least two switch values to do the
4926 // transformation. A switch with one value is just a conditional branch.
4927 if (ExtraCase && Values.size() < 2)
4928 return false;
4929
4930 // TODO: Preserve branch weight metadata, similarly to how
4931 // FoldValueComparisonIntoPredecessors preserves it.
4932
4933 // Figure out which block is which destination.
4934 BasicBlock *DefaultBB = BI->getSuccessor(1);
4935 BasicBlock *EdgeBB = BI->getSuccessor(0);
4936 if (!TrueWhenEqual)
4937 std::swap(DefaultBB, EdgeBB);
4938
4939 BasicBlock *BB = BI->getParent();
4940
4941 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4942 << " cases into SWITCH. BB is:\n"
4943 << *BB);
4944
4946
4947 // If there are any extra values that couldn't be folded into the switch
4948 // then we evaluate them with an explicit branch first. Split the block
4949 // right before the condbr to handle it.
4950 if (ExtraCase) {
4951 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4952 /*MSSAU=*/nullptr, "switch.early.test");
4953
4954 // Remove the uncond branch added to the old block.
4955 Instruction *OldTI = BB->getTerminator();
4956 Builder.SetInsertPoint(OldTI);
4957
4958 // There can be an unintended UB if extra values are Poison. Before the
4959 // transformation, extra values may not be evaluated according to the
4960 // condition, and it will not raise UB. But after transformation, we are
4961 // evaluating extra values before checking the condition, and it will raise
4962 // UB. It can be solved by adding freeze instruction to extra values.
4963 AssumptionCache *AC = Options.AC;
4964
4965 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4966 ExtraCase = Builder.CreateFreeze(ExtraCase);
4967
4968 if (TrueWhenEqual)
4969 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4970 else
4971 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4972
4973 OldTI->eraseFromParent();
4974
4975 if (DTU)
4976 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4977
4978 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4979 // for the edge we just added.
4980 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4981
4982 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4983 << "\nEXTRABB = " << *BB);
4984 BB = NewBB;
4985 }
4986
4987 Builder.SetInsertPoint(BI);
4988 // Convert pointer to int before we switch.
4989 if (CompVal->getType()->isPointerTy()) {
4990 CompVal = Builder.CreatePtrToInt(
4991 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4992 }
4993
4994 // Create the new switch instruction now.
4995 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4996
4997 // Add all of the 'cases' to the switch instruction.
4998 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4999 New->addCase(Values[i], EdgeBB);
5000
5001 // We added edges from PI to the EdgeBB. As such, if there were any
5002 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5003 // the number of edges added.
5004 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5005 PHINode *PN = cast<PHINode>(BBI);
5006 Value *InVal = PN->getIncomingValueForBlock(BB);
5007 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5008 PN->addIncoming(InVal, BB);
5009 }
5010
5011 // Erase the old branch instruction.
5013 if (DTU)
5014 DTU->applyUpdates(Updates);
5015
5016 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5017 return true;
5018}
5019
5020bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5021 if (isa<PHINode>(RI->getValue()))
5022 return simplifyCommonResume(RI);
5023 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5024 RI->getValue() == RI->getParent()->getFirstNonPHI())
5025 // The resume must unwind the exception that caused control to branch here.
5026 return simplifySingleResume(RI);
5027
5028 return false;
5029}
5030
5031// Check if cleanup block is empty
5033 for (Instruction &I : R) {
5034 auto *II = dyn_cast<IntrinsicInst>(&I);
5035 if (!II)
5036 return false;
5037
5038 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5039 switch (IntrinsicID) {
5040 case Intrinsic::dbg_declare:
5041 case Intrinsic::dbg_value:
5042 case Intrinsic::dbg_label:
5043 case Intrinsic::lifetime_end:
5044 break;
5045 default:
5046 return false;
5047 }
5048 }
5049 return true;
5050}
5051
5052// Simplify resume that is shared by several landing pads (phi of landing pad).
5053bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5054 BasicBlock *BB = RI->getParent();
5055
5056 // Check that there are no other instructions except for debug and lifetime
5057 // intrinsics between the phi's and resume instruction.
5060 return false;
5061
5062 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5063 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5064
5065 // Check incoming blocks to see if any of them are trivial.
5066 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5067 Idx++) {
5068 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5069 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5070
5071 // If the block has other successors, we can not delete it because
5072 // it has other dependents.
5073 if (IncomingBB->getUniqueSuccessor() != BB)
5074 continue;
5075
5076 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5077 // Not the landing pad that caused the control to branch here.
5078 if (IncomingValue != LandingPad)
5079 continue;
5080
5082 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5083 TrivialUnwindBlocks.insert(IncomingBB);
5084 }
5085
5086 // If no trivial unwind blocks, don't do any simplifications.
5087 if (TrivialUnwindBlocks.empty())
5088 return false;
5089
5090 // Turn all invokes that unwind here into calls.
5091 for (auto *TrivialBB : TrivialUnwindBlocks) {
5092 // Blocks that will be simplified should be removed from the phi node.
5093 // Note there could be multiple edges to the resume block, and we need
5094 // to remove them all.
5095 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5096 BB->removePredecessor(TrivialBB, true);
5097
5098 for (BasicBlock *Pred :
5100 removeUnwindEdge(Pred, DTU);
5101 ++NumInvokes;
5102 }
5103
5104 // In each SimplifyCFG run, only the current processed block can be erased.
5105 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5106 // of erasing TrivialBB, we only remove the branch to the common resume
5107 // block so that we can later erase the resume block since it has no
5108 // predecessors.
5109 TrivialBB->getTerminator()->eraseFromParent();
5110 new UnreachableInst(RI->getContext(), TrivialBB);
5111 if (DTU)
5112 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5113 }
5114
5115 // Delete the resume block if all its predecessors have been removed.
5116 if (pred_empty(BB))
5117 DeleteDeadBlock(BB, DTU);
5118
5119 return !TrivialUnwindBlocks.empty();
5120}
5121
5122// Simplify resume that is only used by a single (non-phi) landing pad.
5123bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5124 BasicBlock *BB = RI->getParent();
5125 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5126 assert(RI->getValue() == LPInst &&
5127 "Resume must unwind the exception that caused control to here");
5128
5129 // Check that there are no other instructions except for debug intrinsics.
5131 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5132 return false;
5133
5134 // Turn all invokes that unwind here into calls and delete the basic block.
5136 removeUnwindEdge(Pred, DTU);
5137 ++NumInvokes;
5138 }
5139
5140 // The landingpad is now unreachable. Zap it.
5141 DeleteDeadBlock(BB, DTU);
5142 return true;
5143}
5144
5146 // If this is a trivial cleanup pad that executes no instructions, it can be
5147 // eliminated. If the cleanup pad continues to the caller, any predecessor
5148 // that is an EH pad will be updated to continue to the caller and any
5149 // predecessor that terminates with an invoke instruction will have its invoke
5150 // instruction converted to a call instruction. If the cleanup pad being
5151 // simplified does not continue to the caller, each predecessor will be
5152 // updated to continue to the unwind destination of the cleanup pad being
5153 // simplified.
5154 BasicBlock *BB = RI->getParent();
5155 CleanupPadInst *CPInst = RI->getCleanupPad();
5156 if (CPInst->getParent() != BB)
5157 // This isn't an empty cleanup.
5158 return false;
5159
5160 // We cannot kill the pad if it has multiple uses. This typically arises
5161 // from unreachable basic blocks.
5162 if (!CPInst->hasOneUse())
5163 return false;
5164
5165 // Check that there are no other instructions except for benign intrinsics.
5167 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5168 return false;
5169
5170 // If the cleanup return we are simplifying unwinds to the caller, this will
5171 // set UnwindDest to nullptr.
5172 BasicBlock *UnwindDest = RI->getUnwindDest();
5173 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5174
5175 // We're about to remove BB from the control flow. Before we do, sink any
5176 // PHINodes into the unwind destination. Doing this before changing the
5177 // control flow avoids some potentially slow checks, since we can currently
5178 // be certain that UnwindDest and BB have no common predecessors (since they
5179 // are both EH pads).
5180 if (UnwindDest) {
5181 // First, go through the PHI nodes in UnwindDest and update any nodes that
5182 // reference the block we are removing
5183 for (PHINode &DestPN : UnwindDest->phis()) {
5184 int Idx = DestPN.getBasicBlockIndex(BB);
5185 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5186 assert(Idx != -1);
5187 // This PHI node has an incoming value that corresponds to a control
5188 // path through the cleanup pad we are removing. If the incoming
5189 // value is in the cleanup pad, it must be a PHINode (because we
5190 // verified above that the block is otherwise empty). Otherwise, the
5191 // value is either a constant or a value that dominates the cleanup
5192 // pad being removed.
5193 //
5194 // Because BB and UnwindDest are both EH pads, all of their
5195 // predecessors must unwind to these blocks, and since no instruction
5196 // can have multiple unwind destinations, there will be no overlap in
5197 // incoming blocks between SrcPN and DestPN.
5198 Value *SrcVal = DestPN.getIncomingValue(Idx);
5199 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5200
5201 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5202 for (auto *Pred : predecessors(BB)) {
5203 Value *Incoming =
5204 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5205 DestPN.addIncoming(Incoming, Pred);
5206 }
5207 }
5208
5209 // Sink any remaining PHI nodes directly into UnwindDest.
5210 Instruction *InsertPt = DestEHPad;
5211 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5212 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5213 // If the PHI node has no uses or all of its uses are in this basic
5214 // block (meaning they are debug or lifetime intrinsics), just leave
5215 // it. It will be erased when we erase BB below.
5216 continue;
5217
5218 // Otherwise, sink this PHI node into UnwindDest.
5219 // Any predecessors to UnwindDest which are not already represented
5220 // must be back edges which inherit the value from the path through
5221 // BB. In this case, the PHI value must reference itself.
5222 for (auto *pred : predecessors(UnwindDest))
5223 if (pred != BB)
5224 PN.addIncoming(&PN, pred);
5225 PN.moveBefore(InsertPt);
5226 // Also, add a dummy incoming value for the original BB itself,
5227 // so that the PHI is well-formed until we drop said predecessor.
5228 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5229 }
5230 }
5231
5232 std::vector<DominatorTree::UpdateType> Updates;
5233
5234 // We use make_early_inc_range here because we will remove all predecessors.
5236 if (UnwindDest == nullptr) {
5237 if (DTU) {
5238 DTU->applyUpdates(Updates);
5239 Updates.clear();
5240 }
5241 removeUnwindEdge(PredBB, DTU);
5242 ++NumInvokes;
5243 } else {
5244 BB->removePredecessor(PredBB);
5245 Instruction *TI = PredBB->getTerminator();
5246 TI->replaceUsesOfWith(BB, UnwindDest);
5247 if (DTU) {
5248 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5249 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5250 }
5251 }
5252 }
5253
5254 if (DTU)
5255 DTU->applyUpdates(Updates);
5256
5257 DeleteDeadBlock(BB, DTU);
5258
5259 return true;
5260}
5261
5262// Try to merge two cleanuppads together.
5264 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5265 // with.
5266 BasicBlock *UnwindDest = RI->getUnwindDest();
5267 if (!UnwindDest)
5268 return false;
5269
5270 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5271 // be safe to merge without code duplication.
5272 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5273 return false;
5274
5275 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5276 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5277 if (!SuccessorCleanupPad)
5278 return false;
5279
5280 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5281 // Replace any uses of the successor cleanupad with the predecessor pad
5282 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5283 // funclet bundle operands.
5284 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5285 // Remove the old cleanuppad.
5286 SuccessorCleanupPad->eraseFromParent();
5287 // Now, we simply replace the cleanupret with a branch to the unwind
5288 // destination.
5289 BranchInst::Create(UnwindDest, RI->getParent());
5290 RI->eraseFromParent();
5291
5292 return true;
5293}
5294
5295bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5296 // It is possible to transiantly have an undef cleanuppad operand because we
5297 // have deleted some, but not all, dead blocks.
5298 // Eventually, this block will be deleted.
5299 if (isa<UndefValue>(RI->getOperand(0)))
5300 return false;
5301
5302 if (mergeCleanupPad(RI))
5303 return true;
5304
5305 if (removeEmptyCleanup(RI, DTU))
5306 return true;
5307
5308 return false;
5309}
5310
5311// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5312bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5313 BasicBlock *BB = UI->getParent();
5314
5315 bool Changed = false;
5316
5317 // Ensure that any debug-info records that used to occur after the Unreachable
5318 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5319 // the block.
5321
5322 // Debug-info records on the unreachable inst itself should be deleted, as
5323 // below we delete everything past the final executable instruction.
5324 UI->dropDbgRecords();
5325
5326 // If there are any instructions immediately before the unreachable that can
5327 // be removed, do so.
5328 while (UI->getIterator() != BB->begin()) {
5330 --BBI;
5331
5333 break; // Can not drop any more instructions. We're done here.
5334 // Otherwise, this instruction can be freely erased,
5335 // even if it is not side-effect free.
5336
5337 // Note that deleting EH's here is in fact okay, although it involves a bit
5338 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5339 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5340 // and we can therefore guarantee this block will be erased.
5341
5342 // If we're deleting this, we're deleting any subsequent debug info, so
5343 // delete DbgRecords.
5344 BBI->dropDbgRecords();
5345
5346 // Delete this instruction (any uses are guaranteed to be dead)
5347 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5348 BBI->eraseFromParent();
5349 Changed = true;
5350 }
5351
5352 // If the unreachable instruction is the first in the block, take a gander
5353 // at all of the predecessors of this instruction, and simplify them.
5354 if (&BB->front() != UI)
5355 return Changed;
5356
5357 std::vector<DominatorTree::UpdateType> Updates;
5358
5360 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5361 auto *Predecessor = Preds[i];
5362 Instruction *TI = Predecessor->getTerminator();
5363 IRBuilder<> Builder(TI);
5364 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5365 // We could either have a proper unconditional branch,
5366 // or a degenerate conditional branch with matching destinations.
5367 if (all_of(BI->successors(),
5368 [BB](auto *Successor) { return Successor == BB; })) {
5369 new UnreachableInst(TI->getContext(), TI->getIterator());
5370 TI->eraseFromParent();
5371 Changed = true;
5372 } else {
5373 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5374 Value* Cond = BI->getCondition();
5375 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5376 "The destinations are guaranteed to be different here.");
5377 CallInst *Assumption;
5378 if (BI->getSuccessor(0) == BB) {
5379 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5380 Builder.CreateBr(BI->getSuccessor(1));
5381 } else {
5382 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5383 Assumption = Builder.CreateAssumption(Cond);
5384 Builder.CreateBr(BI->getSuccessor(0));
5385 }
5386 if (Options.AC)
5387 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5388
5390 Changed = true;
5391 }
5392 if (DTU)
5393 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5394 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5396 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5397 if (i->getCaseSuccessor() != BB) {
5398 ++i;
5399 continue;
5400 }
5401 BB->removePredecessor(SU->getParent());
5402 i = SU.removeCase(i);
5403 e = SU->case_end();
5404 Changed = true;
5405 }
5406 // Note that the default destination can't be removed!
5407 if (DTU && SI->getDefaultDest() != BB)
5408 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5409 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5410 if (II->getUnwindDest() == BB) {
5411 if (DTU) {
5412 DTU->applyUpdates(Updates);
5413 Updates.clear();
5414 }
5415 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5416 if (!CI->doesNotThrow())
5417 CI->setDoesNotThrow();
5418 Changed = true;
5419 }
5420 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5421 if (CSI->getUnwindDest() == BB) {
5422 if (DTU) {
5423 DTU->applyUpdates(Updates);
5424 Updates.clear();
5425 }
5426 removeUnwindEdge(TI->getParent(), DTU);
5427 Changed = true;
5428 continue;
5429 }
5430
5431 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5432 E = CSI->handler_end();
5433 I != E; ++I) {
5434 if (*I == BB) {
5435 CSI->removeHandler(I);
5436 --I;
5437 --E;
5438 Changed = true;
5439 }
5440 }
5441 if (DTU)
5442 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5443 if (CSI->getNumHandlers() == 0) {
5444 if (CSI->hasUnwindDest()) {
5445 // Redirect all predecessors of the block containing CatchSwitchInst
5446 // to instead branch to the CatchSwitchInst's unwind destination.
5447 if (DTU) {
5448 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5449 Updates.push_back({DominatorTree::Insert,
5450 PredecessorOfPredecessor,
5451 CSI->getUnwindDest()});
5452 Updates.push_back({DominatorTree::Delete,
5453 PredecessorOfPredecessor, Predecessor});
5454 }
5455 }
5456 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5457 } else {
5458 // Rewrite all preds to unwind to caller (or from invoke to call).
5459 if (DTU) {
5460 DTU->applyUpdates(Updates);
5461 Updates.clear();
5462 }
5463 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5464 for (BasicBlock *EHPred : EHPreds)
5465 removeUnwindEdge(EHPred, DTU);
5466 }
5467 // The catchswitch is no longer reachable.
5468 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5469 CSI->eraseFromParent();
5470 Changed = true;
5471 }
5472 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5473 (void)CRI;
5474 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5475 "Expected to always have an unwind to BB.");
5476 if (DTU)
5477 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5478 new UnreachableInst(TI->getContext(), TI->getIterator());
5479 TI->eraseFromParent();
5480 Changed = true;
5481 }
5482 }
5483
5484 if (DTU)
5485 DTU->applyUpdates(Updates);
5486
5487 // If this block is now dead, remove it.
5488 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5489 DeleteDeadBlock(BB, DTU);
5490 return true;
5491 }
5492
5493 return Changed;
5494}
5495
5497 assert(Cases.size() >= 1);
5498
5500 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5501 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5502 return false;
5503 }
5504 return true;
5505}
5506
5508 DomTreeUpdater *DTU) {
5509 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5510 auto *BB = Switch->getParent();
5511 auto *OrigDefaultBlock = Switch->getDefaultDest();
5512 OrigDefaultBlock->removePredecessor(BB);
5513 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5514 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5515 OrigDefaultBlock);
5516 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5517 Switch->setDefaultDest(&*NewDefaultBlock);
5518 if (DTU) {
5520 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5521 if (!is_contained(successors(BB), OrigDefaultBlock))
5522 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5523 DTU->applyUpdates(Updates);
5524 }
5525}
5526
5527/// Turn a switch into an integer range comparison and branch.
5528/// Switches with more than 2 destinations are ignored.
5529/// Switches with 1 destination are also ignored.
5530bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5531 IRBuilder<> &Builder) {
5532 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5533
5534 bool HasDefault =
5535 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5536
5537 auto *BB = SI->getParent();
5538
5539 // Partition the cases into two sets with different destinations.
5540 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5541 BasicBlock *DestB = nullptr;
5544
5545 for (auto Case : SI->cases()) {
5546 BasicBlock *Dest = Case.getCaseSuccessor();
5547 if (!DestA)
5548 DestA = Dest;
5549 if (Dest == DestA) {
5550 CasesA.push_back(Case.getCaseValue());
5551 continue;
5552 }
5553 if (!DestB)
5554 DestB = Dest;
5555 if (Dest == DestB) {
5556 CasesB.push_back(Case.getCaseValue());
5557 continue;
5558 }
5559 return false; // More than two destinations.
5560 }
5561 if (!DestB)
5562 return false; // All destinations are the same and the default is unreachable
5563
5564 assert(DestA && DestB &&
5565 "Single-destination switch should have been folded.");
5566 assert(DestA != DestB);
5567 assert(DestB != SI->getDefaultDest());
5568 assert(!CasesB.empty() && "There must be non-default cases.");
5569 assert(!CasesA.empty() || HasDefault);
5570
5571 // Figure out if one of the sets of cases form a contiguous range.
5572 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5573 BasicBlock *ContiguousDest = nullptr;
5574 BasicBlock *OtherDest = nullptr;
5575 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5576 ContiguousCases = &CasesA;
5577 ContiguousDest = DestA;
5578 OtherDest = DestB;
5579 } else if (CasesAreContiguous(CasesB)) {
5580 ContiguousCases = &CasesB;
5581 ContiguousDest = DestB;
5582 OtherDest = DestA;
5583 } else
5584 return false;
5585
5586 // Start building the compare and branch.
5587
5588 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5589 Constant *NumCases =
5590 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5591
5592 Value *Sub = SI->getCondition();
5593 if (!Offset->isNullValue())
5594 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5595
5596 Value *Cmp;
5597 // If NumCases overflowed, then all possible values jump to the successor.
5598 if (NumCases->isNullValue() && !ContiguousCases->empty())
5599 Cmp = ConstantInt::getTrue(SI->getContext());
5600 else
5601 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5602 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5603
5604 // Update weight for the newly-created conditional branch.
5605 if (hasBranchWeightMD(*SI)) {
5607 GetBranchWeights(SI, Weights);
5608 if (Weights.size() == 1 + SI->getNumCases()) {
5609 uint64_t TrueWeight = 0;
5610 uint64_t FalseWeight = 0;
5611 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5612 if (SI->getSuccessor(I) == ContiguousDest)
5613 TrueWeight += Weights[I];
5614 else
5615 FalseWeight += Weights[I];
5616 }
5617 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5618 TrueWeight /= 2;
5619 FalseWeight /= 2;
5620 }
5621 setBranchWeights(NewBI, TrueWeight, FalseWeight);
5622 }
5623 }
5624
5625 // Prune obsolete incoming values off the successors' PHI nodes.
5626 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5627 unsigned PreviousEdges = ContiguousCases->size();
5628 if (ContiguousDest == SI->getDefaultDest())
5629 ++PreviousEdges;
5630 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5631 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5632 }
5633 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5634 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5635 if (OtherDest == SI->getDefaultDest())
5636 ++PreviousEdges;
5637 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5638 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5639 }
5640
5641 // Clean up the default block - it may have phis or other instructions before
5642 // the unreachable terminator.
5643 if (!HasDefault)
5645
5646 auto *UnreachableDefault = SI->getDefaultDest();
5647
5648 // Drop the switch.
5649 SI->eraseFromParent();
5650
5651 if (!HasDefault && DTU)
5652 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5653
5654 return true;
5655}
5656
5657/// Compute masked bits for the condition of a switch
5658/// and use it to remove dead cases.
5660 AssumptionCache *AC,
5661 const DataLayout &DL) {
5662 Value *Cond = SI->getCondition();
5663 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5664
5665 // We can also eliminate cases by determining that their values are outside of
5666 // the limited range of the condition based on how many significant (non-sign)
5667 // bits are in the condition value.
5668 unsigned MaxSignificantBitsInCond =
5669 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5670
5671 // Gather dead cases.
5673 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5674 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5675 for (const auto &Case : SI->cases()) {
5676 auto *Successor = Case.getCaseSuccessor();
5677 if (DTU) {
5678 if (!NumPerSuccessorCases.count(Successor))
5679 UniqueSuccessors.push_back(Successor);
5680 ++NumPerSuccessorCases[Successor];
5681 }
5682 const APInt &CaseVal = Case.getCaseValue()->getValue();
5683 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5684 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5685 DeadCases.push_back(Case.getCaseValue());
5686 if (DTU)
5687 --NumPerSuccessorCases[Successor];
5688 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5689 << " is dead.\n");
5690 }
5691 }
5692
5693 // If we can prove that the cases must cover all possible values, the
5694 // default destination becomes dead and we can remove it. If we know some
5695 // of the bits in the value, we can use that to more precisely compute the
5696 // number of possible unique case values.
5697 bool HasDefault =
5698 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5699 const unsigned NumUnknownBits =
5700 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5701 assert(NumUnknownBits <= Known.getBitWidth());
5702 if (HasDefault && DeadCases.empty() &&
5703 NumUnknownBits < 64 /* avoid overflow */ &&
5704 SI->getNumCases() == (1ULL << NumUnknownBits)) {
5706 return true;
5707 }
5708
5709 if (DeadCases.empty())
5710 return false;
5711
5713 for (ConstantInt *DeadCase : DeadCases) {
5714 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5715 assert(CaseI != SI->case_default() &&
5716 "Case was not found. Probably mistake in DeadCases forming.");
5717 // Prune unused values from PHI nodes.
5718 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5719 SIW.removeCase(CaseI);
5720 }
5721
5722 if (DTU) {
5723 std::vector<DominatorTree::UpdateType> Updates;
5724 for (auto *Successor : UniqueSuccessors)
5725 if (NumPerSuccessorCases[Successor] == 0)
5726 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5727 DTU->applyUpdates(Updates);
5728 }
5729
5730 return true;
5731}
5732
5733/// If BB would be eligible for simplification by
5734/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5735/// by an unconditional branch), look at the phi node for BB in the successor
5736/// block and see if the incoming value is equal to CaseValue. If so, return
5737/// the phi node, and set PhiIndex to BB's index in the phi node.
5739 BasicBlock *BB, int *PhiIndex) {
5740 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5741 return nullptr; // BB must be empty to be a candidate for simplification.
5742 if (!BB->getSinglePredecessor())
5743 return nullptr; // BB must be dominated by the switch.
5744
5745 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5746 if (!Branch || !Branch->isUnconditional())
5747 return nullptr; // Terminator must be unconditional branch.
5748
5749 BasicBlock *Succ = Branch->getSuccessor(0);
5750
5751 for (PHINode &PHI : Succ->phis()) {
5752 int Idx = PHI.getBasicBlockIndex(BB);
5753 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5754
5755 Value *InValue = PHI.getIncomingValue(Idx);
5756 if (InValue != CaseValue)
5757 continue;
5758
5759 *PhiIndex = Idx;
5760 return &PHI;
5761 }
5762
5763 return nullptr;
5764}
5765
5766/// Try to forward the condition of a switch instruction to a phi node
5767/// dominated by the switch, if that would mean that some of the destination
5768/// blocks of the switch can be folded away. Return true if a change is made.
5770 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5771
5772 ForwardingNodesMap ForwardingNodes;
5773 BasicBlock *SwitchBlock = SI->getParent();
5774 bool Changed = false;
5775 for (const auto &Case : SI->cases()) {
5776 ConstantInt *CaseValue = Case.getCaseValue();
5777 BasicBlock *CaseDest = Case.getCaseSuccessor();
5778
5779 // Replace phi operands in successor blocks that are using the constant case
5780 // value rather than the switch condition variable:
5781 // switchbb:
5782 // switch i32 %x, label %default [
5783 // i32 17, label %succ
5784 // ...
5785 // succ:
5786 // %r = phi i32 ... [ 17, %switchbb ] ...
5787 // -->
5788 // %r = phi i32 ... [ %x, %switchbb ] ...
5789
5790 for (PHINode &Phi : CaseDest->phis()) {
5791 // This only works if there is exactly 1 incoming edge from the switch to
5792 // a phi. If there is >1, that means multiple cases of the switch map to 1
5793 // value in the phi, and that phi value is not the switch condition. Thus,
5794 // this transform would not make sense (the phi would be invalid because
5795 // a phi can't have different incoming values from the same block).
5796 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5797 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5798 count(Phi.blocks(), SwitchBlock) == 1) {
5799 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5800 Changed = true;
5801 }
5802 }
5803
5804 // Collect phi nodes that are indirectly using this switch's case constants.
5805 int PhiIdx;
5806 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5807 ForwardingNodes[Phi].push_back(PhiIdx);
5808 }
5809
5810 for (auto &ForwardingNode : ForwardingNodes) {
5811 PHINode *Phi = ForwardingNode.first;
5812 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5813 if (Indexes.size() < 2)
5814 continue;
5815
5816 for (int Index : Indexes)
5817 Phi->setIncomingValue(Index, SI->getCondition());
5818 Changed = true;
5819 }
5820
5821 return Changed;
5822}
5823
5824/// Return true if the backend will be able to handle
5825/// initializing an array of constants like C.
5827 if (C->isThreadDependent())
5828 return false;
5829 if (C->isDLLImportDependent())
5830 return false;
5831
5832 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5833 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5834 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5835 return false;
5836
5837 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5838 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5839 // materializing the array of constants.
5840 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5841 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5842 return false;
5843 }
5844
5846 return false;
5847
5848 return true;
5849}
5850
5851/// If V is a Constant, return it. Otherwise, try to look up
5852/// its constant value in ConstantPool, returning 0 if it's not there.
5853static Constant *
5856 if (Constant *C = dyn_cast<Constant>(V))
5857 return C;
5858 return ConstantPool.lookup(V);
5859}
5860
5861/// Try to fold instruction I into a constant. This works for
5862/// simple instructions such as binary operations where both operands are
5863/// constant or can be replaced by constants from the ConstantPool. Returns the
5864/// resulting constant on success, 0 otherwise.
5865static Constant *
5868 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5869 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5870 if (!A)
5871 return nullptr;
5872 if (A->isAllOnesValue())
5873 return LookupConstant(Select->getTrueValue(), ConstantPool);
5874 if (A->isNullValue())
5875 return LookupConstant(Select->getFalseValue(), ConstantPool);
5876 return nullptr;
5877 }
5878
5880 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5881 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5882 COps.push_back(A);
5883 else
5884 return nullptr;
5885 }
5886
5887 return ConstantFoldInstOperands(I, COps, DL);
5888}
5889
5890/// Try to determine the resulting constant values in phi nodes
5891/// at the common destination basic block, *CommonDest, for one of the case
5892/// destionations CaseDest corresponding to value CaseVal (0 for the default
5893/// case), of a switch instruction SI.
5894static bool
5896 BasicBlock **CommonDest,
5897 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5898 const DataLayout &DL, const TargetTransformInfo &TTI) {
5899 // The block from which we enter the common destination.
5900 BasicBlock *Pred = SI->getParent();
5901
5902 // If CaseDest is empty except for some side-effect free instructions through
5903 // which we can constant-propagate the CaseVal, continue to its successor.
5905 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5906 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5907 if (I.isTerminator()) {
5908 // If the terminator is a simple branch, continue to the next block.
5909 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5910 return false;
5911 Pred = CaseDest;
5912 CaseDest = I.getSuccessor(0);
5913 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5914 // Instruction is side-effect free and constant.
5915
5916 // If the instruction has uses outside this block or a phi node slot for
5917 // the block, it is not safe to bypass the instruction since it would then
5918 // no longer dominate all its uses.
5919 for (auto &Use : I.uses()) {
5920 User *User = Use.getUser();
5921 if (Instruction *I = dyn_cast<Instruction>(User))
5922 if (I->getParent() == CaseDest)
5923 continue;
5924 if (PHINode *Phi = dyn_cast<PHINode>(User))
5925 if (Phi->getIncomingBlock(Use) == CaseDest)
5926 continue;
5927 return false;
5928 }
5929
5930 ConstantPool.insert(std::make_pair(&I, C));
5931 } else {
5932 break;
5933 }
5934 }
5935
5936 // If we did not have a CommonDest before, use the current one.
5937 if (!*CommonDest)
5938 *CommonDest = CaseDest;
5939 // If the destination isn't the common one, abort.
5940 if (CaseDest != *CommonDest)
5941 return false;
5942
5943 // Get the values for this case from phi nodes in the destination block.
5944 for (PHINode &PHI : (*CommonDest)->phis()) {
5945 int Idx = PHI.getBasicBlockIndex(Pred);
5946 if (Idx == -1)
5947 continue;
5948
5949 Constant *ConstVal =
5950 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5951 if (!ConstVal)
5952 return false;
5953
5954 // Be conservative about which kinds of constants we support.
5955 if (!ValidLookupTableConstant(ConstVal, TTI))
5956 return false;
5957
5958 Res.push_back(std::make_pair(&PHI, ConstVal));
5959 }
5960
5961 return Res.size() > 0;
5962}
5963
5964// Helper function used to add CaseVal to the list of cases that generate
5965// Result. Returns the updated number of cases that generate this result.
5966static size_t mapCaseToResult(ConstantInt *CaseVal,
5967 SwitchCaseResultVectorTy &UniqueResults,
5968 Constant *Result) {
5969 for (auto &I : UniqueResults) {
5970 if (I.first == Result) {
5971 I.second.push_back(CaseVal);
5972 return I.second.size();
5973 }
5974 }
5975 UniqueResults.push_back(
5976 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5977 return 1;
5978}
5979
5980// Helper function that initializes a map containing
5981// results for the PHI node of the common destination block for a switch
5982// instruction. Returns false if multiple PHI nodes have been found or if
5983// there is not a common destination block for the switch.
5985 BasicBlock *&CommonDest,
5986 SwitchCaseResultVectorTy &UniqueResults,
5987 Constant *&DefaultResult,
5988 const DataLayout &DL,
5989 const TargetTransformInfo &TTI,
5990 uintptr_t MaxUniqueResults) {
5991 for (const auto &I : SI->cases()) {
5992 ConstantInt *CaseVal = I.getCaseValue();
5993
5994 // Resulting value at phi nodes for this case value.
5995 SwitchCaseResultsTy Results;
5996 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5997 DL, TTI))
5998 return false;
5999
6000 // Only one value per case is permitted.
6001 if (Results.size() > 1)
6002 return false;
6003
6004 // Add the case->result mapping to UniqueResults.
6005 const size_t NumCasesForResult =
6006 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6007
6008 // Early out if there are too many cases for this result.
6009 if (NumCasesForResult > MaxSwitchCasesPerResult)
6010 return false;
6011
6012 // Early out if there are too many unique results.
6013 if (UniqueResults.size() > MaxUniqueResults)
6014 return false;
6015
6016 // Check the PHI consistency.
6017 if (!PHI)
6018 PHI = Results[0].first;
6019 else if (PHI != Results[0].first)
6020 return false;
6021 }
6022 // Find the default result value.
6024 BasicBlock *DefaultDest = SI->getDefaultDest();
6025 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6026 DL, TTI);
6027 // If the default value is not found abort unless the default destination
6028 // is unreachable.
6029 DefaultResult =
6030 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6031 if ((!DefaultResult &&
6032 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6033 return false;
6034
6035 return true;
6036}
6037
6038// Helper function that checks if it is possible to transform a switch with only
6039// two cases (or two cases + default) that produces a result into a select.
6040// TODO: Handle switches with more than 2 cases that map to the same result.
6041static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6042 Constant *DefaultResult, Value *Condition,
6043 IRBuilder<> &Builder) {
6044 // If we are selecting between only two cases transform into a simple
6045 // select or a two-way select if default is possible.
6046 // Example:
6047 // switch (a) { %0 = icmp eq i32 %a, 10
6048 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6049 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6050 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6051 // }
6052 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6053 ResultVector[1].second.size() == 1) {
6054 ConstantInt *FirstCase = ResultVector[0].second[0];
6055 ConstantInt *SecondCase = ResultVector[1].second[0];
6056 Value *SelectValue = ResultVector[1].first;
6057 if (DefaultResult) {
6058 Value *ValueCompare =
6059 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6060 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6061 DefaultResult, "switch.select");
6062 }
6063 Value *ValueCompare =
6064 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6065 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6066 SelectValue, "switch.select");
6067 }
6068
6069 // Handle the degenerate case where two cases have the same result value.
6070 if (ResultVector.size() == 1 && DefaultResult) {
6071 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6072 unsigned CaseCount = CaseValues.size();
6073 // n bits group cases map to the same result:
6074 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6075 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6076 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6077 if (isPowerOf2_32(CaseCount)) {
6078 ConstantInt *MinCaseVal = CaseValues[0];
6079 // Find mininal value.
6080 for (auto *Case : CaseValues)
6081 if (Case->getValue().slt(MinCaseVal->getValue()))
6082 MinCaseVal = Case;
6083
6084 // Mark the bits case number touched.
6085 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6086 for (auto *Case : CaseValues)
6087 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6088
6089 // Check if cases with the same result can cover all number
6090 // in touched bits.
6091 if (BitMask.popcount() == Log2_32(CaseCount)) {
6092 if (!MinCaseVal->isNullValue())
6093 Condition = Builder.CreateSub(Condition, MinCaseVal);
6094 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6095 Value *Cmp = Builder.CreateICmpEQ(
6096 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6097 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6098 }
6099 }
6100
6101 // Handle the degenerate case where two cases have the same value.
6102 if (CaseValues.size() == 2) {
6103 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6104 "switch.selectcmp.case1");
6105 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6106 "switch.selectcmp.case2");
6107 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6108 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6109 }
6110 }
6111
6112 return nullptr;
6113}
6114
6115// Helper function to cleanup a switch instruction that has been converted into
6116// a select, fixing up PHI nodes and basic blocks.
6118 Value *SelectValue,
6119 IRBuilder<> &Builder,
6120 DomTreeUpdater *DTU) {
6121 std::vector<DominatorTree::UpdateType> Updates;
6122
6123 BasicBlock *SelectBB = SI->getParent();
6124 BasicBlock *DestBB = PHI->getParent();
6125
6126 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6127 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6128 Builder.CreateBr(DestBB);
6129
6130 // Remove the switch.
6131
6132 PHI->removeIncomingValueIf(
6133 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6134 PHI->addIncoming(SelectValue, SelectBB);
6135
6136 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6137 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6138 BasicBlock *Succ = SI->getSuccessor(i);
6139
6140 if (Succ == DestBB)
6141 continue;
6142 Succ->removePredecessor(SelectBB);
6143 if (DTU && RemovedSuccessors.insert(Succ).second)
6144 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6145 }
6146 SI->eraseFromParent();
6147 if (DTU)
6148 DTU->applyUpdates(Updates);
6149}
6150
6151/// If a switch is only used to initialize one or more phi nodes in a common
6152/// successor block with only two different constant values, try to replace the
6153/// switch with a select. Returns true if the fold was made.
6154static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6155 DomTreeUpdater *DTU, const DataLayout &DL,
6156 const TargetTransformInfo &TTI) {
6157 Value *const Cond = SI->getCondition();
6158 PHINode *PHI = nullptr;
6159 BasicBlock *CommonDest = nullptr;
6160 Constant *DefaultResult;
6161 SwitchCaseResultVectorTy UniqueResults;
6162 // Collect all the cases that will deliver the same value from the switch.
6163 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6164 DL, TTI, /*MaxUniqueResults*/ 2))
6165 return false;
6166
6167 assert(PHI != nullptr && "PHI for value select not found");
6168 Builder.SetInsertPoint(SI);
6169 Value *SelectValue =
6170 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6171 if (!SelectValue)
6172 return false;
6173
6174 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6175 return true;
6176}
6177
6178namespace {
6179
6180/// This class represents a lookup table that can be used to replace a switch.
6181class SwitchLookupTable {
6182public:
6183 /// Create a lookup table to use as a switch replacement with the contents
6184 /// of Values, using DefaultValue to fill any holes in the table.
6185 SwitchLookupTable(
6186 Module &M, uint64_t TableSize, ConstantInt *Offset,
6187 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6188 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6189
6190 /// Build instructions with Builder to retrieve the value at
6191 /// the position given by Index in the lookup table.
6192 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6193
6194 /// Return true if a table with TableSize elements of
6195 /// type ElementType would fit in a target-legal register.
6196 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6197 Type *ElementType);
6198
6199private:
6200 // Depending on the contents of the table, it can be represented in
6201 // different ways.
6202 enum {
6203 // For tables where each element contains the same value, we just have to
6204 // store that single value and return it for each lookup.
6205 SingleValueKind,
6206
6207 // For tables where there is a linear relationship between table index
6208 // and values. We calculate the result with a simple multiplication
6209 // and addition instead of a table lookup.
6210 LinearMapKind,
6211
6212 // For small tables with integer elements, we can pack them into a bitmap
6213 // that fits into a target-legal register. Values are retrieved by
6214 // shift and mask operations.
6215 BitMapKind,
6216
6217 // The table is stored as an array of values. Values are retrieved by load
6218 // instructions from the table.
6219 ArrayKind
6220 } Kind;
6221
6222 // For SingleValueKind, this is the single value.
6223 Constant *SingleValue = nullptr;
6224
6225 // For BitMapKind, this is the bitmap.
6226 ConstantInt *BitMap = nullptr;
6227 IntegerType *BitMapElementTy = nullptr;
6228
6229 // For LinearMapKind, these are the constants used to derive the value.
6230 ConstantInt *LinearOffset = nullptr;
6231 ConstantInt *LinearMultiplier = nullptr;
6232 bool LinearMapValWrapped = false;
6233
6234 // For ArrayKind, this is the array.
6235 GlobalVariable *Array = nullptr;
6236};
6237
6238} // end anonymous namespace
6239
6240SwitchLookupTable::SwitchLookupTable(
6241 Module &M, uint64_t TableSize, ConstantInt *Offset,
6242 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6243 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6244 assert(Values.size() && "Can't build lookup table without values!");
6245 assert(TableSize >= Values.size() && "Can't fit values in table!");
6246
6247 // If all values in the table are equal, this is that value.
6248 SingleValue = Values.begin()->second;
6249
6250 Type *ValueType = Values.begin()->second->getType();
6251
6252 // Build up the table contents.
6253 SmallVector<Constant *, 64> TableContents(TableSize);
6254 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6255 ConstantInt *CaseVal = Values[I].first;
6256 Constant *CaseRes = Values[I].second;
6257 assert(CaseRes->getType() == ValueType);
6258
6259 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6260 TableContents[Idx] = CaseRes;
6261
6262 if (CaseRes != SingleValue)
6263 SingleValue = nullptr;
6264 }
6265
6266 // Fill in any holes in the table with the default result.
6267 if (Values.size() < TableSize) {
6268 assert(DefaultValue &&
6269 "Need a default value to fill the lookup table holes.");
6270 assert(DefaultValue->getType() == ValueType);
6271 for (uint64_t I = 0; I < TableSize; ++I) {
6272 if (!TableContents[I])
6273 TableContents[I] = DefaultValue;
6274 }
6275
6276 if (DefaultValue != SingleValue)
6277 SingleValue = nullptr;
6278 }
6279
6280 // If each element in the table contains the same value, we only need to store
6281 // that single value.
6282 if (SingleValue) {
6283 Kind = SingleValueKind;
6284 return;
6285 }
6286
6287 // Check if we can derive the value with a linear transformation from the
6288 // table index.
6289 if (isa<IntegerType>(ValueType)) {
6290 bool LinearMappingPossible = true;
6291 APInt PrevVal;
6292 APInt DistToPrev;
6293 // When linear map is monotonic and signed overflow doesn't happen on
6294 // maximum index, we can attach nsw on Add and Mul.
6295 bool NonMonotonic = false;
6296 assert(TableSize >= 2 && "Should be a SingleValue table.");
6297 // Check if there is the same distance between two consecutive values.
6298 for (uint64_t I = 0; I < TableSize; ++I) {
6299 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6300 if (!ConstVal) {
6301 // This is an undef. We could deal with it, but undefs in lookup tables
6302 // are very seldom. It's probably not worth the additional complexity.
6303 LinearMappingPossible = false;
6304 break;
6305 }
6306 const APInt &Val = ConstVal->getValue();
6307 if (I != 0) {
6308 APInt Dist = Val - PrevVal;
6309 if (I == 1) {
6310 DistToPrev = Dist;
6311 } else if (Dist != DistToPrev) {
6312 LinearMappingPossible = false;
6313 break;
6314 }
6315 NonMonotonic |=
6316 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6317 }
6318 PrevVal = Val;
6319 }
6320 if (LinearMappingPossible) {
6321 LinearOffset = cast<ConstantInt>(TableContents[0]);
6322 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6323 bool MayWrap = false;
6324 APInt M = LinearMultiplier->getValue();
6325 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6326 LinearMapValWrapped = NonMonotonic || MayWrap;
6327 Kind = LinearMapKind;
6328 ++NumLinearMaps;
6329 return;
6330 }
6331 }
6332
6333 // If the type is integer and the table fits in a register, build a bitmap.
6334 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6335 IntegerType *IT = cast<IntegerType>(ValueType);
6336 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6337 for (uint64_t I = TableSize; I > 0; --I) {
6338 TableInt <<= IT->getBitWidth();
6339 // Insert values into the bitmap. Undef values are set to zero.
6340 if (!isa<UndefValue>(TableContents[I - 1])) {
6341 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6342 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6343 }
6344 }
6345 BitMap = ConstantInt::get(M.getContext(), TableInt);
6346 BitMapElementTy = IT;
6347 Kind = BitMapKind;
6348 ++NumBitMaps;
6349 return;
6350 }
6351
6352 // Store the table in an array.
6353 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6354 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6355
6356 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6357 GlobalVariable::PrivateLinkage, Initializer,
6358 "switch.table." + FuncName);
6359 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6360 // Set the alignment to that of an array items. We will be only loading one
6361 // value out of it.
6362 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6363 Kind = ArrayKind;
6364}
6365
6366Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6367 switch (Kind) {
6368 case SingleValueKind:
6369 return SingleValue;
6370 case LinearMapKind: {
6371 // Derive the result value from the input value.
6372 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6373 false, "switch.idx.cast");
6374 if (!LinearMultiplier->isOne())
6375 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6376 /*HasNUW = */ false,
6377 /*HasNSW = */ !LinearMapValWrapped);
6378
6379 if (!LinearOffset->isZero())
6380 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6381 /*HasNUW = */ false,
6382 /*HasNSW = */ !LinearMapValWrapped);
6383 return Result;
6384 }
6385 case BitMapKind: {
6386 // Type of the bitmap (e.g. i59).
6387 IntegerType *MapTy = BitMap->getIntegerType();
6388
6389 // Cast Index to the same type as the bitmap.
6390 // Note: The Index is <= the number of elements in the table, so
6391 // truncating it to the width of the bitmask is safe.
6392 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6393
6394 // Multiply the shift amount by the element width. NUW/NSW can always be
6395 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6396 // BitMap's bit width.
6397 ShiftAmt = Builder.CreateMul(
6398 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6399 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6400
6401 // Shift down.
6402 Value *DownShifted =
6403 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6404 // Mask off.
6405 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6406 }
6407 case ArrayKind: {
6408 // Make sure the table index will not overflow when treated as signed.
6409 IntegerType *IT = cast<IntegerType>(Index->getType());
6410 uint64_t TableSize =
6411 Array->getInitializer()->getType()->getArrayNumElements();
6412 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6413 Index = Builder.CreateZExt(
6414 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6415 "switch.tableidx.zext");
6416
6417 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6418 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6419 GEPIndices, "switch.gep");
6420 return Builder.CreateLoad(
6421 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6422 "switch.load");
6423 }
6424 }
6425 llvm_unreachable("Unknown lookup table kind!");
6426}
6427
6428bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6429 uint64_t TableSize,
6430 Type *ElementType) {
6431 auto *IT = dyn_cast<IntegerType>(ElementType);
6432 if (!IT)
6433 return false;
6434 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6435 // are <= 15, we could try to narrow the type.
6436
6437 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6438 if (TableSize >= UINT_MAX / IT->getBitWidth())
6439 return false;
6440 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6441}
6442
6444 const DataLayout &DL) {
6445 // Allow any legal type.
6446 if (TTI.isTypeLegal(Ty))
6447 return true;
6448
6449 auto *IT = dyn_cast<IntegerType>(Ty);
6450 if (!IT)
6451 return false;
6452
6453 // Also allow power of 2 integer types that have at least 8 bits and fit in
6454 // a register. These types are common in frontend languages and targets
6455 // usually support loads of these types.
6456 // TODO: We could relax this to any integer that fits in a register and rely
6457 // on ABI alignment and padding in the table to allow the load to be widened.
6458 // Or we could widen the constants and truncate the load.
6459 unsigned BitWidth = IT->getBitWidth();
6460 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6461 DL.fitsInLegalInteger(IT->getBitWidth());
6462}
6463
6464static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6465 // 40% is the default density for building a jump table in optsize/minsize
6466 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6467 // function was based on.
6468 const uint64_t MinDensity = 40;
6469
6470 if (CaseRange >= UINT64_MAX / 100)
6471 return false; // Avoid multiplication overflows below.
6472
6473 return NumCases * 100 >= CaseRange * MinDensity;
6474}
6475
6477 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6478 uint64_t Range = Diff + 1;
6479 if (Range < Diff)
6480 return false; // Overflow.
6481
6482 return isSwitchDense(Values.size(), Range);
6483}
6484
6485/// Determine whether a lookup table should be built for this switch, based on
6486/// the number of cases, size of the table, and the types of the results.
6487// TODO: We could support larger than legal types by limiting based on the
6488// number of loads required and/or table size. If the constants are small we
6489// could use smaller table entries and extend after the load.
6490static bool
6492 const TargetTransformInfo &TTI, const DataLayout &DL,
6493 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6494 if (SI->getNumCases() > TableSize)
6495 return false; // TableSize overflowed.
6496
6497 bool AllTablesFitInRegister = true;
6498 bool HasIllegalType = false;
6499 for (const auto &I : ResultTypes) {
6500 Type *Ty = I.second;
6501
6502 // Saturate this flag to true.
6503 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6504
6505 // Saturate this flag to false.
6506 AllTablesFitInRegister =
6507 AllTablesFitInRegister &&
6508 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6509
6510 // If both flags saturate, we're done. NOTE: This *only* works with
6511 // saturating flags, and all flags have to saturate first due to the
6512 // non-deterministic behavior of iterating over a dense map.
6513 if (HasIllegalType && !AllTablesFitInRegister)
6514 break;
6515 }
6516
6517 // If each table would fit in a register, we should build it anyway.
6518 if (AllTablesFitInRegister)
6519 return true;
6520
6521 // Don't build a table that doesn't fit in-register if it has illegal types.
6522 if (HasIllegalType)
6523 return false;
6524
6525 return isSwitchDense(SI->getNumCases(), TableSize);
6526}
6527
6529 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6530 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6531 const DataLayout &DL, const TargetTransformInfo &TTI) {
6532 if (MinCaseVal.isNullValue())
6533 return true;
6534 if (MinCaseVal.isNegative() ||
6535 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6536 !HasDefaultResults)
6537 return false;
6538 return all_of(ResultTypes, [&](const auto &KV) {
6539 return SwitchLookupTable::WouldFitInRegister(
6540 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6541 KV.second /* ResultType */);
6542 });
6543}
6544
6545/// Try to reuse the switch table index compare. Following pattern:
6546/// \code
6547/// if (idx < tablesize)
6548/// r = table[idx]; // table does not contain default_value
6549/// else
6550/// r = default_value;
6551/// if (r != default_value)
6552/// ...
6553/// \endcode
6554/// Is optimized to:
6555/// \code
6556/// cond = idx < tablesize;
6557/// if (cond)
6558/// r = table[idx];
6559/// else
6560/// r = default_value;
6561/// if (cond)
6562/// ...
6563/// \endcode
6564/// Jump threading will then eliminate the second if(cond).
6566 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6567 Constant *DefaultValue,
6568 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6569 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6570 if (!CmpInst)
6571 return;
6572
6573 // We require that the compare is in the same block as the phi so that jump
6574 // threading can do its work afterwards.
6575 if (CmpInst->getParent() != PhiBlock)
6576 return;
6577
6578 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6579 if (!CmpOp1)
6580 return;
6581
6582 Value *RangeCmp = RangeCheckBranch->getCondition();
6583 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6584 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6585
6586 // Check if the compare with the default value is constant true or false.
6588 DefaultValue, CmpOp1, true);
6589 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6590 return;
6591
6592 // Check if the compare with the case values is distinct from the default
6593 // compare result.
6594 for (auto ValuePair : Values) {
6596 ValuePair.second, CmpOp1, true);
6597 if (!CaseConst || CaseConst == DefaultConst ||
6598 (CaseConst != TrueConst && CaseConst != FalseConst))
6599 return;
6600 }
6601
6602 // Check if the branch instruction dominates the phi node. It's a simple
6603 // dominance check, but sufficient for our needs.
6604 // Although this check is invariant in the calling loops, it's better to do it
6605 // at this late stage. Practically we do it at most once for a switch.
6606 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6607 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6608 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6609 return;
6610 }
6611
6612 if (DefaultConst == FalseConst) {
6613 // The compare yields the same result. We can replace it.
6614 CmpInst->replaceAllUsesWith(RangeCmp);
6615 ++NumTableCmpReuses;
6616 } else {
6617 // The compare yields the same result, just inverted. We can replace it.
6618 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6619 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6620 RangeCheckBranch->getIterator());
6621 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6622 ++NumTableCmpReuses;
6623 }
6624}
6625
6626/// If the switch is only used to initialize one or more phi nodes in a common
6627/// successor block with different constant values, replace the switch with
6628/// lookup tables.
6630 DomTreeUpdater *DTU, const DataLayout &DL,
6631 const TargetTransformInfo &TTI) {
6632 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6633
6634 BasicBlock *BB = SI->getParent();
6635 Function *Fn = BB->getParent();
6636 // Only build lookup table when we have a target that supports it or the
6637 // attribute is not set.
6639 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6640 return false;
6641
6642 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6643 // split off a dense part and build a lookup table for that.
6644
6645 // FIXME: This creates arrays of GEPs to constant strings, which means each
6646 // GEP needs a runtime relocation in PIC code. We should just build one big
6647 // string and lookup indices into that.
6648
6649 // Ignore switches with less than three cases. Lookup tables will not make
6650 // them faster, so we don't analyze them.
6651 if (SI->getNumCases() < 3)
6652 return false;
6653
6654 // Figure out the corresponding result for each case value and phi node in the
6655 // common destination, as well as the min and max case values.
6656 assert(!SI->cases().empty());
6657 SwitchInst::CaseIt CI = SI->case_begin();
6658 ConstantInt *MinCaseVal = CI->getCaseValue();
6659 ConstantInt *MaxCaseVal = CI->getCaseValue();
6660
6661 BasicBlock *CommonDest = nullptr;
6662
6663 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6665
6669
6670 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6671 ConstantInt *CaseVal = CI->getCaseValue();
6672 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6673 MinCaseVal = CaseVal;
6674 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6675 MaxCaseVal = CaseVal;
6676
6677 // Resulting value at phi nodes for this case value.
6679 ResultsTy Results;
6680 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6681 Results, DL, TTI))
6682 return false;
6683
6684 // Append the result from this case to the list for each phi.
6685 for (const auto &I : Results) {
6686 PHINode *PHI = I.first;
6687 Constant *Value = I.second;
6688 if (!ResultLists.count(PHI))
6689 PHIs.push_back(PHI);
6690 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6691 }
6692 }
6693
6694 // Keep track of the result types.
6695 for (PHINode *PHI : PHIs) {
6696 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6697 }
6698
6699 uint64_t NumResults = ResultLists[PHIs[0]].size();
6700
6701 // If the table has holes, we need a constant result for the default case
6702 // or a bitmask that fits in a register.
6703 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6704 bool HasDefaultResults =
6705 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6706 DefaultResultsList, DL, TTI);
6707
6708 for (const auto &I : DefaultResultsList) {
6709 PHINode *PHI = I.first;
6710 Constant *Result = I.second;
6711 DefaultResults[PHI] = Result;
6712 }
6713
6714 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6715 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6716 uint64_t TableSize;
6717 if (UseSwitchConditionAsTableIndex)
6718 TableSize = MaxCaseVal->getLimitedValue() + 1;
6719 else
6720 TableSize =
6721 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6722
6723 bool TableHasHoles = (NumResults < TableSize);
6724 bool NeedMask = (TableHasHoles && !HasDefaultResults);
6725 if (NeedMask) {
6726 // As an extra penalty for the validity test we require more cases.
6727 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6728 return false;
6729 if (!DL.fitsInLegalInteger(TableSize))
6730 return false;
6731 }
6732
6733 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6734 return false;
6735
6736 std::vector<DominatorTree::UpdateType> Updates;
6737
6738 // Compute the maximum table size representable by the integer type we are
6739 // switching upon.
6740 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6741 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6742 assert(MaxTableSize >= TableSize &&
6743 "It is impossible for a switch to have more entries than the max "
6744 "representable value of its input integer type's size.");
6745
6746 // If the default destination is unreachable, or if the lookup table covers
6747 // all values of the conditional variable, branch directly to the lookup table
6748 // BB. Otherwise, check that the condition is within the case range.
6749 bool DefaultIsReachable =
6750 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6751
6752 // Create the BB that does the lookups.
6753 Module &Mod = *CommonDest->getParent()->getParent();
6754 BasicBlock *LookupBB = BasicBlock::Create(
6755 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6756
6757 // Compute the table index value.
6758 Builder.SetInsertPoint(SI);
6759 Value *TableIndex;
6760 ConstantInt *TableIndexOffset;
6761 if (UseSwitchConditionAsTableIndex) {
6762 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6763 TableIndex = SI->getCondition();
6764 } else {
6765 TableIndexOffset = MinCaseVal;
6766 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6767 // we can try to attach nsw.
6768 bool MayWrap = true;
6769 if (!DefaultIsReachable) {
6770 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6771 (void)Res;
6772 }
6773
6774 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6775 "switch.tableidx", /*HasNUW =*/false,
6776 /*HasNSW =*/!MayWrap);
6777 }
6778
6779 BranchInst *RangeCheckBranch = nullptr;
6780
6781 // Grow the table to cover all possible index values to avoid the range check.
6782 // It will use the default result to fill in the table hole later, so make
6783 // sure it exist.
6784 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6785 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6786 // Grow the table shouldn't have any size impact by checking
6787 // WouldFitInRegister.
6788 // TODO: Consider growing the table also when it doesn't fit in a register
6789 // if no optsize is specified.
6790 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6791 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6792 return SwitchLookupTable::WouldFitInRegister(
6793 DL, UpperBound, KV.second /* ResultType */);
6794 })) {
6795 // There may be some case index larger than the UpperBound (unreachable
6796 // case), so make sure the table size does not get smaller.
6797 TableSize = std::max(UpperBound, TableSize);
6798 // The default branch is unreachable after we enlarge the lookup table.
6799 // Adjust DefaultIsReachable to reuse code path.
6800 DefaultIsReachable = false;
6801 }
6802 }
6803
6804 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6805 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6806 Builder.CreateBr(LookupBB);
6807 if (DTU)
6808 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6809 // Note: We call removeProdecessor later since we need to be able to get the
6810 // PHI value for the default case in case we're using a bit mask.
6811 } else {
6812 Value *Cmp = Builder.CreateICmpULT(
6813 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6814 RangeCheckBranch =
6815 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6816 if (DTU)
6817 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6818 }
6819
6820 // Populate the BB that does the lookups.
6821 Builder.SetInsertPoint(LookupBB);
6822
6823 if (NeedMask) {
6824 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6825 // re-purposed to do the hole check, and we create a new LookupBB.
6826 BasicBlock *MaskBB = LookupBB;
6827 MaskBB->setName("switch.hole_check");
6828 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6829 CommonDest->getParent(), CommonDest);
6830
6831 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6832 // unnecessary illegal types.
6833 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6834 APInt MaskInt(TableSizePowOf2, 0);
6835 APInt One(TableSizePowOf2, 1);
6836 // Build bitmask; fill in a 1 bit for every case.
6837 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6838 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6839 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6840 .getLimitedValue();
6841 MaskInt |= One << Idx;
6842 }
6843 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6844
6845 // Get the TableIndex'th bit of the bitmask.
6846 // If this bit is 0 (meaning hole) jump to the default destination,
6847 // else continue with table lookup.
6848 IntegerType *MapTy = TableMask->getIntegerType();
6849 Value *MaskIndex =
6850 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6851 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6852 Value *LoBit = Builder.CreateTrunc(
6853 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6854 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6855 if (DTU) {
6856 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6857 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6858 }
6859 Builder.SetInsertPoint(LookupBB);
6860 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6861 }
6862
6863 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6864 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6865 // do not delete PHINodes here.
6866 SI->getDefaultDest()->removePredecessor(BB,
6867 /*KeepOneInputPHIs=*/true);
6868 if (DTU)
6869 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6870 }
6871
6872 for (PHINode *PHI : PHIs) {
6873 const ResultListTy &ResultList = ResultLists[PHI];
6874
6875 // If using a bitmask, use any value to fill the lookup table holes.
6876 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6877 StringRef FuncName = Fn->getName();
6878 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6879 DL, FuncName);
6880
6881 Value *Result = Table.BuildLookup(TableIndex, Builder);
6882
6883 // Do a small peephole optimization: re-use the switch table compare if
6884 // possible.
6885 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6886 BasicBlock *PhiBlock = PHI->getParent();
6887 // Search for compare instructions which use the phi.
6888 for (auto *User : PHI->users()) {
6889 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6890 }
6891 }
6892
6893 PHI->addIncoming(Result, LookupBB);
6894 }
6895
6896 Builder.CreateBr(CommonDest);
6897 if (DTU)
6898 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6899
6900 // Remove the switch.
6901 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6902 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6903 BasicBlock *Succ = SI->getSuccessor(i);
6904
6905 if (Succ == SI->getDefaultDest())
6906 continue;
6907 Succ->removePredecessor(BB);
6908 if (DTU && RemovedSuccessors.insert(Succ).second)
6909 Updates.push_back({DominatorTree::Delete, BB, Succ});
6910 }
6911 SI->eraseFromParent();
6912
6913 if (DTU)
6914 DTU->applyUpdates(Updates);
6915
6916 ++NumLookupTables;
6917 if (NeedMask)
6918 ++NumLookupTablesHoles;
6919 return true;
6920}
6921
6922/// Try to transform a switch that has "holes" in it to a contiguous sequence
6923/// of cases.
6924///
6925/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6926/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6927///
6928/// This converts a sparse switch into a dense switch which allows better
6929/// lowering and could also allow transforming into a lookup table.
6930static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6931 const DataLayout &DL,
6932 const TargetTransformInfo &TTI) {
6933 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6934 if (CondTy->getIntegerBitWidth() > 64 ||
6935 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6936 return false;
6937 // Only bother with this optimization if there are more than 3 switch cases;
6938 // SDAG will only bother creating jump tables for 4 or more cases.
6939 if (SI->getNumCases() < 4)
6940 return false;
6941
6942 // This transform is agnostic to the signedness of the input or case values. We
6943 // can treat the case values as signed or unsigned. We can optimize more common
6944 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6945 // as signed.
6947 for (const auto &C : SI->cases())
6948 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6949 llvm::sort(Values);
6950
6951 // If the switch is already dense, there's nothing useful to do here.
6952 if (isSwitchDense(Values))
6953 return false;
6954
6955 // First, transform the values such that they start at zero and ascend.
6956 int64_t Base = Values[0];
6957 for (auto &V : Values)
6958 V -= (uint64_t)(Base);
6959
6960 // Now we have signed numbers that have been shifted so that, given enough
6961 // precision, there are no negative values. Since the rest of the transform
6962 // is bitwise only, we switch now to an unsigned representation.
6963
6964 // This transform can be done speculatively because it is so cheap - it
6965 // results in a single rotate operation being inserted.
6966
6967 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6968 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6969 // less than 64.
6970 unsigned Shift = 64;
6971 for (auto &V : Values)
6972 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6973 assert(Shift < 64);
6974 if (Shift > 0)
6975 for (auto &V : Values)
6976 V = (int64_t)((uint64_t)V >> Shift);
6977
6978 if (!isSwitchDense(Values))
6979 // Transform didn't create a dense switch.
6980 return false;
6981
6982 // The obvious transform is to shift the switch condition right and emit a
6983 // check that the condition actually cleanly divided by GCD, i.e.
6984 // C & (1 << Shift - 1) == 0
6985 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6986 //
6987 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6988 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6989 // are nonzero then the switch condition will be very large and will hit the
6990 // default case.
6991
6992 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6993 Builder.SetInsertPoint(SI);
6994 Value *Sub =
6995 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6996 Value *Rot = Builder.CreateIntrinsic(
6997 Ty, Intrinsic::fshl,
6998 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6999 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7000
7001 for (auto Case : SI->cases()) {
7002 auto *Orig = Case.getCaseValue();
7003 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
7004 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7005 }
7006 return true;
7007}
7008
7009/// Tries to transform switch of powers of two to reduce switch range.
7010/// For example, switch like:
7011/// switch (C) { case 1: case 2: case 64: case 128: }
7012/// will be transformed to:
7013/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7014///
7015/// This transformation allows better lowering and could allow transforming into
7016/// a lookup table.
7018 const DataLayout &DL,
7019 const TargetTransformInfo &TTI) {
7020 Value *Condition = SI->getCondition();
7021 LLVMContext &Context = SI->getContext();
7022 auto *CondTy = cast<IntegerType>(Condition->getType());
7023
7024 if (CondTy->getIntegerBitWidth() > 64 ||
7025 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7026 return false;
7027
7028 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7029 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7030 {Condition, ConstantInt::getTrue(Context)}),
7032
7033 if (CttzIntrinsicCost > TTI::TCC_Basic)
7034 // Inserting intrinsic is too expensive.
7035 return false;
7036
7037 // Only bother with this optimization if there are more than 3 switch cases.
7038 // SDAG will only bother creating jump tables for 4 or more cases.
7039 if (SI->getNumCases() < 4)
7040 return false;
7041
7042 // We perform this optimization only for switches with
7043 // unreachable default case.
7044 // This assumtion will save us from checking if `Condition` is a power of two.
7045 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7046 return false;
7047
7048 // Check that switch cases are powers of two.
7050 for (const auto &Case : SI->cases()) {
7051 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7052 if (llvm::has_single_bit(CaseValue))
7053 Values.push_back(CaseValue);
7054 else
7055 return false;
7056 }
7057
7058 // isSwichDense requires case values to be sorted.
7059 llvm::sort(Values);
7060 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7061 llvm::countr_zero(Values.front()) + 1))
7062 // Transform is unable to generate dense switch.
7063 return false;
7064
7065 Builder.SetInsertPoint(SI);
7066
7067 // Replace each case with its trailing zeros number.
7068 for (auto &Case : SI->cases()) {
7069 auto *OrigValue = Case.getCaseValue();
7070 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7071 OrigValue->getValue().countr_zero()));
7072 }
7073
7074 // Replace condition with its trailing zeros number.
7075 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7076 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7077
7078 SI->setCondition(ConditionTrailingZeros);
7079
7080 return true;
7081}
7082
7083bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7084 BasicBlock *BB = SI->getParent();
7085
7086 if (isValueEqualityComparison(SI)) {
7087 // If we only have one predecessor, and if it is a branch on this value,
7088 // see if that predecessor totally determines the outcome of this switch.
7089 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7090 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7091 return requestResimplify();
7092
7093 Value *Cond = SI->getCondition();
7094 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7095 if (SimplifySwitchOnSelect(SI, Select))
7096 return requestResimplify();
7097
7098 // If the block only contains the switch, see if we can fold the block
7099 // away into any preds.
7100 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7101 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7102 return requestResimplify();
7103 }
7104
7105 // Try to transform the switch into an icmp and a branch.
7106 // The conversion from switch to comparison may lose information on
7107 // impossible switch values, so disable it early in the pipeline.
7108 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7109 return requestResimplify();
7110
7111 // Remove unreachable cases.
7112 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7113 return requestResimplify();
7114
7115 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7116 return requestResimplify();
7117
7118 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7119 return requestResimplify();
7120
7121 // The conversion from switch to lookup tables results in difficult-to-analyze
7122 // code and makes pruning branches much harder. This is a problem if the
7123 // switch expression itself can still be restricted as a result of inlining or
7124 // CVP. Therefore, only apply this transformation during late stages of the
7125 // optimisation pipeline.
7126 if (Options.ConvertSwitchToLookupTable &&
7127 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7128 return requestResimplify();
7129
7130 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7131 return requestResimplify();
7132
7133 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7134 return requestResimplify();
7135
7136 if (HoistCommon &&
7137 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7138 return requestResimplify();
7139
7140 return false;
7141}
7142
7143bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7144 BasicBlock *BB = IBI->getParent();
7145 bool Changed = false;
7146
7147 // Eliminate redundant destinations.
7150 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7151 BasicBlock *Dest = IBI->getDestination(i);
7152 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7153 if (!Dest->hasAddressTaken())
7154 RemovedSuccs.insert(Dest);
7155 Dest->removePredecessor(BB);
7156 IBI->removeDestination(i);
7157 --i;
7158 --e;
7159 Changed = true;
7160 }
7161 }
7162
7163 if (DTU) {
7164 std::vector<DominatorTree::UpdateType> Updates;
7165 Updates.reserve(RemovedSuccs.size());
7166 for (auto *RemovedSucc : RemovedSuccs)
7167 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7168 DTU->applyUpdates(Updates);
7169 }
7170
7171 if (IBI->getNumDestinations() == 0) {
7172 // If the indirectbr has no successors, change it to unreachable.
7173 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7175 return true;
7176 }
7177
7178 if (IBI->getNumDestinations() == 1) {
7179 // If the indirectbr has one successor, change it to a direct branch.
7182 return true;
7183 }
7184
7185 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7186 if (SimplifyIndirectBrOnSelect(IBI, SI))
7187 return requestResimplify();
7188 }
7189 return Changed;
7190}
7191
7192/// Given an block with only a single landing pad and a unconditional branch
7193/// try to find another basic block which this one can be merged with. This
7194/// handles cases where we have multiple invokes with unique landing pads, but
7195/// a shared handler.
7196///
7197/// We specifically choose to not worry about merging non-empty blocks
7198/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7199/// practice, the optimizer produces empty landing pad blocks quite frequently
7200/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7201/// sinking in this file)
7202///
7203/// This is primarily a code size optimization. We need to avoid performing
7204/// any transform which might inhibit optimization (such as our ability to
7205/// specialize a particular handler via tail commoning). We do this by not
7206/// merging any blocks which require us to introduce a phi. Since the same
7207/// values are flowing through both blocks, we don't lose any ability to
7208/// specialize. If anything, we make such specialization more likely.
7209///
7210/// TODO - This transformation could remove entries from a phi in the target
7211/// block when the inputs in the phi are the same for the two blocks being
7212/// merged. In some cases, this could result in removal of the PHI entirely.
7214 BasicBlock *BB, DomTreeUpdater *DTU) {
7215 auto Succ = BB->getUniqueSuccessor();
7216 assert(Succ);
7217 // If there's a phi in the successor block, we'd likely have to introduce
7218 // a phi into the merged landing pad block.
7219 if (isa<PHINode>(*Succ->begin()))
7220 return false;
7221
7222 for (BasicBlock *OtherPred : predecessors(Succ)) {
7223 if (BB == OtherPred)
7224 continue;
7225 BasicBlock::iterator I = OtherPred->begin();
7226 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7227 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7228 continue;
7229 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7230 ;
7231 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7232 if (!BI2 || !BI2->isIdenticalTo(BI))
7233 continue;
7234
7235 std::vector<DominatorTree::UpdateType> Updates;
7236
7237 // We've found an identical block. Update our predecessors to take that
7238 // path instead and make ourselves dead.
7240 for (BasicBlock *Pred : UniquePreds) {
7241 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7242 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7243 "unexpected successor");
7244 II->setUnwindDest(OtherPred);
7245 if (DTU) {
7246 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7247 Updates.push_back({DominatorTree::Delete, Pred, BB});
7248 }
7249 }
7250
7251 // The debug info in OtherPred doesn't cover the merged control flow that
7252 // used to go through BB. We need to delete it or update it.
7253 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7254 if (isa<DbgInfoIntrinsic>(Inst))
7255 Inst.eraseFromParent();
7256
7258 for (BasicBlock *Succ : UniqueSuccs) {
7259 Succ->removePredecessor(BB);
7260 if (DTU)
7261 Updates.push_back({DominatorTree::Delete, BB, Succ});
7262 }
7263
7264 IRBuilder<> Builder(BI);
7265 Builder.CreateUnreachable();
7266 BI->eraseFromParent();
7267 if (DTU)
7268 DTU->applyUpdates(Updates);
7269 return true;
7270 }
7271 return false;
7272}
7273
7274bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7275 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7276 : simplifyCondBranch(Branch, Builder);
7277}
7278
7279bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7280 IRBuilder<> &Builder) {
7281 BasicBlock *BB = BI->getParent();
7282 BasicBlock *Succ = BI->getSuccessor(0);
7283
7284 // If the Terminator is the only non-phi instruction, simplify the block.
7285 // If LoopHeader is provided, check if the block or its successor is a loop
7286 // header. (This is for early invocations before loop simplify and
7287 // vectorization to keep canonical loop forms for nested loops. These blocks
7288 // can be eliminated when the pass is invoked later in the back-end.)
7289 // Note that if BB has only one predecessor then we do not introduce new
7290 // backedge, so we can eliminate BB.
7291 bool NeedCanonicalLoop =
7292 Options.NeedCanonicalLoop &&
7293 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7294 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7296 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7297 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7298 return true;
7299
7300 // If the only instruction in the block is a seteq/setne comparison against a
7301 // constant, try to simplify the block.
7302 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7303 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7304 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7305 ;
7306 if (I->isTerminator() &&
7307 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7308 return true;
7309 }
7310
7311 // See if we can merge an empty landing pad block with another which is
7312 // equivalent.
7313 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7314 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7315 ;
7316 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7317 return true;
7318 }
7319
7320 // If this basic block is ONLY a compare and a branch, and if a predecessor
7321 // branches to us and our successor, fold the comparison into the
7322 // predecessor and use logical operations to update the incoming value
7323 // for PHI nodes in common successor.
7324 if (Options.SpeculateBlocks &&
7325 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7326 Options.BonusInstThreshold))
7327 return requestResimplify();
7328 return false;
7329}
7330
7332 BasicBlock *PredPred = nullptr;
7333 for (auto *P : predecessors(BB)) {
7334 BasicBlock *PPred = P->getSinglePredecessor();
7335 if (!PPred || (PredPred && PredPred != PPred))
7336 return nullptr;
7337 PredPred = PPred;
7338 }
7339 return PredPred;
7340}
7341
7342bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7343 assert(
7344 !isa<ConstantInt>(BI->getCondition()) &&
7345 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7346 "Tautological conditional branch should have been eliminated already.");
7347
7348 BasicBlock *BB = BI->getParent();
7349 if (!Options.SimplifyCondBranch ||
7350 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7351 return false;
7352
7353 // Conditional branch
7354 if (isValueEqualityComparison(BI)) {
7355 // If we only have one predecessor, and if it is a branch on this value,
7356 // see if that predecessor totally determines the outcome of this
7357 // switch.
7358 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7359 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7360 return requestResimplify();
7361
7362 // This block must be empty, except for the setcond inst, if it exists.
7363 // Ignore dbg and pseudo intrinsics.
7364 auto I = BB->instructionsWithoutDebug(true).begin();
7365 if (&*I == BI) {
7366 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7367 return requestResimplify();
7368 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7369 ++I;
7370 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7371 return requestResimplify();
7372 }
7373 }
7374
7375 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7376 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7377 return true;
7378
7379 // If this basic block has dominating predecessor blocks and the dominating
7380 // blocks' conditions imply BI's condition, we know the direction of BI.
7381 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7382 if (Imp) {
7383 // Turn this into a branch on constant.
7384 auto *OldCond = BI->getCondition();
7385 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7386 : ConstantInt::getFalse(BB->getContext());
7387 BI->setCondition(TorF);
7389 return requestResimplify();
7390 }
7391
7392 // If this basic block is ONLY a compare and a branch, and if a predecessor
7393 // branches to us and one of our successors, fold the comparison into the
7394 // predecessor and use logical operations to pick the right destination.
7395 if (Options.SpeculateBlocks &&
7396 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7397 Options.BonusInstThreshold))
7398 return requestResimplify();
7399
7400 // We have a conditional branch to two blocks that are only reachable
7401 // from BI. We know that the condbr dominates the two blocks, so see if
7402 // there is any identical code in the "then" and "else" blocks. If so, we
7403 // can hoist it up to the branching block.
7404 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7405 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7406 if (HoistCommon && hoistCommonCodeFromSuccessors(
7407 BI->getParent(), !Options.HoistCommonInsts))
7408 return requestResimplify();
7409 } else {
7410 // If Successor #1 has multiple preds, we may be able to conditionally
7411 // execute Successor #0 if it branches to Successor #1.
7412 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7413 if (Succ0TI->getNumSuccessors() == 1 &&
7414 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7415 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7416 return requestResimplify();
7417 }
7418 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7419 // If Successor #0 has multiple preds, we may be able to conditionally
7420 // execute Successor #1 if it branches to Successor #0.
7421 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7422 if (Succ1TI->getNumSuccessors() == 1 &&
7423 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7424 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7425 return requestResimplify();
7426 }
7427
7428 // If this is a branch on something for which we know the constant value in
7429 // predecessors (e.g. a phi node in the current block), thread control
7430 // through this block.
7432 return requestResimplify();
7433
7434 // Scan predecessor blocks for conditional branches.
7435 for (BasicBlock *Pred : predecessors(BB))
7436 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7437 if (PBI != BI && PBI->isConditional())
7438 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7439 return requestResimplify();
7440
7441 // Look for diamond patterns.
7442 if (MergeCondStores)
7444 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7445 if (PBI != BI && PBI->isConditional())
7446 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7447 return requestResimplify();
7448
7449 return false;
7450}
7451
7452/// Check if passing a value to an instruction will cause undefined behavior.
7453static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7454 Constant *C = dyn_cast<Constant>(V);
7455 if (!C)
7456 return false;
7457
7458 if (I->use_empty())
7459 return false;
7460
7461 if (C->isNullValue() || isa<UndefValue>(C)) {
7462 // Only look at the first use, avoid hurting compile time with long uselists
7463 auto *Use = cast<Instruction>(*I->user_begin());
7464 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7465 // before I in the block. The latter two can be the case if Use is a PHI
7466 // node.
7467 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7468 return false;
7469
7470 // Now make sure that there are no instructions in between that can alter
7471 // control flow (eg. calls)
7472 auto InstrRange =
7473 make_range(std::next(I->getIterator()), Use->getIterator());
7474 if (any_of(InstrRange, [](Instruction &I) {
7476 }))
7477 return false;
7478
7479 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7480 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7481 if (GEP->getPointerOperand() == I) {
7482 // The current base address is null, there are four cases to consider:
7483 // getelementptr (TY, null, 0) -> null
7484 // getelementptr (TY, null, not zero) -> may be modified
7485 // getelementptr inbounds (TY, null, 0) -> null
7486 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7487 // undefined?
7488 if (!GEP->hasAllZeroIndices() &&
7489 (!GEP->isInBounds() ||
7490 NullPointerIsDefined(GEP->getFunction(),
7491 GEP->getPointerAddressSpace())))
7492 PtrValueMayBeModified = true;
7493 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7494 }
7495
7496 // Look through return.
7497 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7498 bool HasNoUndefAttr =
7499 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7500 // Return undefined to a noundef return value is undefined.
7501 if (isa<UndefValue>(C) && HasNoUndefAttr)
7502 return true;
7503 // Return null to a nonnull+noundef return value is undefined.
7504 if (C->isNullValue() && HasNoUndefAttr &&
7505 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7506 return !PtrValueMayBeModified;
7507 }
7508 }
7509
7510 // Look through bitcasts.
7511 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7512 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7513
7514 // Load from null is undefined.
7515 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7516 if (!LI->isVolatile())
7517 return !NullPointerIsDefined(LI->getFunction(),
7518 LI->getPointerAddressSpace());
7519
7520 // Store to null is undefined.
7521 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7522 if (!SI->isVolatile())
7523 return (!NullPointerIsDefined(SI->getFunction(),
7524 SI->getPointerAddressSpace())) &&
7525 SI->getPointerOperand() == I;
7526
7527 if (auto *CB = dyn_cast<CallBase>(Use)) {
7528 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7529 return false;
7530 // A call to null is undefined.
7531 if (CB->getCalledOperand() == I)
7532 return true;
7533
7534 if (C->isNullValue()) {
7535 for (const llvm::Use &Arg : CB->args())
7536 if (Arg == I) {
7537 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7538 if (CB->isPassingUndefUB(ArgIdx) &&
7539 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7540 // Passing null to a nonnnull+noundef argument is undefined.
7541 return !PtrValueMayBeModified;
7542 }
7543 }
7544 } else if (isa<UndefValue>(C)) {
7545 // Passing undef to a noundef argument is undefined.
7546 for (const llvm::Use &Arg : CB->args())
7547 if (Arg == I) {
7548 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7549 if (CB->isPassingUndefUB(ArgIdx)) {
7550 // Passing undef to a noundef argument is undefined.
7551 return true;
7552 }
7553 }
7554 }
7555 }
7556 }
7557 return false;
7558}
7559
7560/// If BB has an incoming value that will always trigger undefined behavior
7561/// (eg. null pointer dereference), remove the branch leading here.
7563 DomTreeUpdater *DTU,
7564 AssumptionCache *AC) {
7565 for (PHINode &PHI : BB->phis())
7566 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7567 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7568 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7569 Instruction *T = Predecessor->getTerminator();
7570 IRBuilder<> Builder(T);
7571 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7572 BB->removePredecessor(Predecessor);
7573 // Turn unconditional branches into unreachables and remove the dead
7574 // destination from conditional branches.
7575 if (BI->isUnconditional())
7576 Builder.CreateUnreachable();
7577 else {
7578 // Preserve guarding condition in assume, because it might not be
7579 // inferrable from any dominating condition.
7580 Value *Cond = BI->getCondition();
7581 CallInst *Assumption;
7582 if (BI->getSuccessor(0) == BB)
7583 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7584 else
7585 Assumption = Builder.CreateAssumption(Cond);
7586 if (AC)
7587 AC->registerAssumption(cast<AssumeInst>(Assumption));
7588 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7589 : BI->getSuccessor(0));
7590 }
7591 BI->eraseFromParent();
7592 if (DTU)
7593 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7594 return true;
7595 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7596 // Redirect all branches leading to UB into
7597 // a newly created unreachable block.
7598 BasicBlock *Unreachable = BasicBlock::Create(
7599 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7600 Builder.SetInsertPoint(Unreachable);
7601 // The new block contains only one instruction: Unreachable
7602 Builder.CreateUnreachable();
7603 for (const auto &Case : SI->cases())
7604 if (Case.getCaseSuccessor() == BB) {
7605 BB->removePredecessor(Predecessor);
7606 Case.setSuccessor(Unreachable);
7607 }
7608 if (SI->getDefaultDest() == BB) {
7609 BB->removePredecessor(Predecessor);
7610 SI->setDefaultDest(Unreachable);
7611 }
7612
7613 if (DTU)
7614 DTU->applyUpdates(
7615 { { DominatorTree::Insert, Predecessor, Unreachable },
7616 { DominatorTree::Delete, Predecessor, BB } });
7617 return true;
7618 }
7619 }
7620
7621 return false;
7622}
7623
7624bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7625 bool Changed = false;
7626
7627 assert(BB && BB->getParent() && "Block not embedded in function!");
7628 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7629
7630 // Remove basic blocks that have no predecessors (except the entry block)...
7631 // or that just have themself as a predecessor. These are unreachable.
7632 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7633 BB->getSinglePredecessor() == BB) {
7634 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7635 DeleteDeadBlock(BB, DTU);
7636 return true;
7637 }
7638
7639 // Check to see if we can constant propagate this terminator instruction
7640 // away...
7641 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7642 /*TLI=*/nullptr, DTU);
7643
7644 // Check for and eliminate duplicate PHI nodes in this block.
7645 Changed |= EliminateDuplicatePHINodes(BB);
7646
7647 // Check for and remove branches that will always cause undefined behavior.
7649 return requestResimplify();
7650
7651 // Merge basic blocks into their predecessor if there is only one distinct
7652 // pred, and if there is only one distinct successor of the predecessor, and
7653 // if there are no PHI nodes.
7654 if (MergeBlockIntoPredecessor(BB, DTU))
7655 return true;
7656
7657 if (SinkCommon && Options.SinkCommonInsts)
7658 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7659 MergeCompatibleInvokes(BB, DTU)) {
7660 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7661 // so we may now how duplicate PHI's.
7662 // Let's rerun EliminateDuplicatePHINodes() first,
7663 // before FoldTwoEntryPHINode() potentially converts them into select's,
7664 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7665 return true;
7666 }
7667
7668 IRBuilder<> Builder(BB);
7669
7670 if (Options.SpeculateBlocks &&
7671 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7672 // If there is a trivial two-entry PHI node in this basic block, and we can
7673 // eliminate it, do so now.
7674 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7675 if (PN->getNumIncomingValues() == 2)
7676 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7677 return true;
7678 }
7679
7681 Builder.SetInsertPoint(Terminator);
7682 switch (Terminator->getOpcode()) {
7683 case Instruction::Br:
7684 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7685 break;
7686 case Instruction::Resume:
7687 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7688 break;
7689 case Instruction::CleanupRet:
7690 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7691 break;
7692 case Instruction::Switch:
7693 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7694 break;
7695 case Instruction::Unreachable:
7696 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7697 break;
7698 case Instruction::IndirectBr:
7699 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7700 break;
7701 }
7702
7703 return Changed;
7704}
7705
7706bool SimplifyCFGOpt::run(BasicBlock *BB) {
7707 bool Changed = false;
7708
7709 // Repeated simplify BB as long as resimplification is requested.
7710 do {
7711 Resimplify = false;
7712
7713 // Perform one round of simplifcation. Resimplify flag will be set if
7714 // another iteration is requested.
7715 Changed |= simplifyOnce(BB);
7716 } while (Resimplify);
7717
7718 return Changed;
7719}
7720
7723 ArrayRef<WeakVH> LoopHeaders) {
7724 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7725 Options)
7726 .run(BB);
7727}
#define Fail
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< Instruction *, SmallVector< Value *, 4 > > &PHIOperands)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:335
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
const Instruction & front() const
Definition: BasicBlock.h:453
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:474
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:490
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:324
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:712
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:672
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:478
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:613
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:509
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:889
BasicBlock * getBasicBlock() const
Definition: Constants.h:918
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
bool cannotMerge() const
Determine if the call cannot be tail merged.
Definition: InstrTypes.h:2280
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2402
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isNegative() const
Definition: Constants.h:200
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:255
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:184
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool hasPostDomTree() const
Returns true if it holds a PostDominatorTree.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
iterator begin()
Definition: Function.h:799
size_t size() const
Definition: Function.h:804
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1143
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:551
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1803
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:84
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1707
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:935
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
BasicBlock * getUnwindDest() const
void setNormalDest(BasicBlock *B)
void setUnwindDest(BasicBlock *B)
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Align getAlign() const
Definition: Instructions.h:369
bool isSimple() const
Definition: Instructions.h:406
Value * getValueOperand()
Definition: Instructions.h:414
bool isUnordered() const
Definition: Instructions.h:408
Value * getPointerOperand()
Definition: Instructions.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
bool user_empty() const
Definition: Value.h:385
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:468
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:815
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1898
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void RemapDbgVariableRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:285
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:693
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1120
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2043
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapDbgVariableRecord(Module *M, DbgVariableRecord *V, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:273
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3160
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:264
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3341
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3587
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4065
auto max_element(R &&Range)
Definition: STLExtras.h:1986
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1487
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254