LLVM 20.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
78#include <algorithm>
79#include <cassert>
80#include <climits>
81#include <cstddef>
82#include <cstdint>
83#include <iterator>
84#include <map>
85#include <optional>
86#include <set>
87#include <tuple>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "simplifycfg"
95
97 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
98
100 "Temporary development switch used to gradually uplift SimplifyCFG "
101 "into preserving DomTree,"));
102
103// Chosen as 2 so as to be cheap, but still to have enough power to fold
104// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
105// To catch this, we need to fold a compare and a select, hence '2' being the
106// minimum reasonable default.
108 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
109 cl::desc(
110 "Control the amount of phi node folding to perform (default = 2)"));
111
113 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
114 cl::desc("Control the maximal total instruction cost that we are willing "
115 "to speculatively execute to fold a 2-entry PHI node into a "
116 "select (default = 4)"));
117
118static cl::opt<bool>
119 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
120 cl::desc("Hoist common instructions up to the parent block"));
121
123 "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
124 cl::init(true),
125 cl::desc("Hoist loads/stores if the target supports "
126 "conditional faulting"));
127
129 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
130 cl::desc("Control the maximal conditional load/store that we are willing "
131 "to speculatively execute to eliminate conditional branch "
132 "(default = 6)"));
133
135 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
136 cl::init(20),
137 cl::desc("Allow reordering across at most this many "
138 "instructions when hoisting"));
139
140static cl::opt<bool>
141 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
142 cl::desc("Sink common instructions down to the end block"));
143
145 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
146 cl::desc("Hoist conditional stores if an unconditional store precedes"));
147
149 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores even if an unconditional store does not "
151 "precede - hoist multiple conditional stores into a single "
152 "predicated store"));
153
155 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
156 cl::desc("When merging conditional stores, do so even if the resultant "
157 "basic blocks are unlikely to be if-converted as a result"));
158
160 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
161 cl::desc("Allow exactly one expensive instruction to be speculatively "
162 "executed"));
163
165 "max-speculation-depth", cl::Hidden, cl::init(10),
166 cl::desc("Limit maximum recursion depth when calculating costs of "
167 "speculatively executed instructions"));
168
169static cl::opt<int>
170 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
171 cl::init(10),
172 cl::desc("Max size of a block which is still considered "
173 "small enough to thread through"));
174
175// Two is chosen to allow one negation and a logical combine.
177 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
178 cl::init(2),
179 cl::desc("Maximum cost of combining conditions when "
180 "folding branches"));
181
183 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
184 cl::init(2),
185 cl::desc("Multiplier to apply to threshold when determining whether or not "
186 "to fold branch to common destination when vector operations are "
187 "present"));
188
190 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
191 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
192
194 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
195 cl::desc("Limit cases to analyze when converting a switch to select"));
196
197STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
198STATISTIC(NumLinearMaps,
199 "Number of switch instructions turned into linear mapping");
200STATISTIC(NumLookupTables,
201 "Number of switch instructions turned into lookup tables");
203 NumLookupTablesHoles,
204 "Number of switch instructions turned into lookup tables (holes checked)");
205STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
206STATISTIC(NumFoldValueComparisonIntoPredecessors,
207 "Number of value comparisons folded into predecessor basic blocks");
208STATISTIC(NumFoldBranchToCommonDest,
209 "Number of branches folded into predecessor basic block");
211 NumHoistCommonCode,
212 "Number of common instruction 'blocks' hoisted up to the begin block");
213STATISTIC(NumHoistCommonInstrs,
214 "Number of common instructions hoisted up to the begin block");
215STATISTIC(NumSinkCommonCode,
216 "Number of common instruction 'blocks' sunk down to the end block");
217STATISTIC(NumSinkCommonInstrs,
218 "Number of common instructions sunk down to the end block");
219STATISTIC(NumSpeculations, "Number of speculative executed instructions");
220STATISTIC(NumInvokes,
221 "Number of invokes with empty resume blocks simplified into calls");
222STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
223STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
224
225namespace {
226
227// The first field contains the value that the switch produces when a certain
228// case group is selected, and the second field is a vector containing the
229// cases composing the case group.
230using SwitchCaseResultVectorTy =
232
233// The first field contains the phi node that generates a result of the switch
234// and the second field contains the value generated for a certain case in the
235// switch for that PHI.
236using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
237
238/// ValueEqualityComparisonCase - Represents a case of a switch.
239struct ValueEqualityComparisonCase {
241 BasicBlock *Dest;
242
243 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
244 : Value(Value), Dest(Dest) {}
245
246 bool operator<(ValueEqualityComparisonCase RHS) const {
247 // Comparing pointers is ok as we only rely on the order for uniquing.
248 return Value < RHS.Value;
249 }
250
251 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
252};
253
254class SimplifyCFGOpt {
256 DomTreeUpdater *DTU;
257 const DataLayout &DL;
258 ArrayRef<WeakVH> LoopHeaders;
259 const SimplifyCFGOptions &Options;
260 bool Resimplify;
261
262 Value *isValueEqualityComparison(Instruction *TI);
263 BasicBlock *getValueEqualityComparisonCases(
264 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
265 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
266 BasicBlock *Pred,
267 IRBuilder<> &Builder);
268 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
269 Instruction *PTI,
270 IRBuilder<> &Builder);
271 bool foldValueComparisonIntoPredecessors(Instruction *TI,
272 IRBuilder<> &Builder);
273
274 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
275 bool simplifySingleResume(ResumeInst *RI);
276 bool simplifyCommonResume(ResumeInst *RI);
277 bool simplifyCleanupReturn(CleanupReturnInst *RI);
278 bool simplifyUnreachable(UnreachableInst *UI);
279 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
280 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
281 bool simplifyIndirectBr(IndirectBrInst *IBI);
282 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
283 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
284 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
285
286 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
287 IRBuilder<> &Builder);
288
289 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
290 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
291 Instruction *TI, Instruction *I1,
292 SmallVectorImpl<Instruction *> &OtherSuccTIs);
293 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
294 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
295 BasicBlock *TrueBB, BasicBlock *FalseBB,
296 uint32_t TrueWeight, uint32_t FalseWeight);
297 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
298 const DataLayout &DL);
299 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
300 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
301 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
302
303public:
304 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
305 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
306 const SimplifyCFGOptions &Opts)
307 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
308 assert((!DTU || !DTU->hasPostDomTree()) &&
309 "SimplifyCFG is not yet capable of maintaining validity of a "
310 "PostDomTree, so don't ask for it.");
311 }
312
313 bool simplifyOnce(BasicBlock *BB);
314 bool run(BasicBlock *BB);
315
316 // Helper to set Resimplify and return change indication.
317 bool requestResimplify() {
318 Resimplify = true;
319 return true;
320 }
321};
322
323} // end anonymous namespace
324
325/// Return true if all the PHI nodes in the basic block \p BB
326/// receive compatible (identical) incoming values when coming from
327/// all of the predecessor blocks that are specified in \p IncomingBlocks.
328///
329/// Note that if the values aren't exactly identical, but \p EquivalenceSet
330/// is provided, and *both* of the values are present in the set,
331/// then they are considered equal.
333 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
334 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
335 assert(IncomingBlocks.size() == 2 &&
336 "Only for a pair of incoming blocks at the time!");
337
338 // FIXME: it is okay if one of the incoming values is an `undef` value,
339 // iff the other incoming value is guaranteed to be a non-poison value.
340 // FIXME: it is okay if one of the incoming values is a `poison` value.
341 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
342 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
343 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
344 if (IV0 == IV1)
345 return true;
346 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
347 EquivalenceSet->contains(IV1))
348 return true;
349 return false;
350 });
351}
352
353/// Return true if it is safe to merge these two
354/// terminator instructions together.
355static bool
357 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
358 if (SI1 == SI2)
359 return false; // Can't merge with self!
360
361 // It is not safe to merge these two switch instructions if they have a common
362 // successor, and if that successor has a PHI node, and if *that* PHI node has
363 // conflicting incoming values from the two switch blocks.
364 BasicBlock *SI1BB = SI1->getParent();
365 BasicBlock *SI2BB = SI2->getParent();
366
367 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
368 bool Fail = false;
369 for (BasicBlock *Succ : successors(SI2BB)) {
370 if (!SI1Succs.count(Succ))
371 continue;
372 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
373 continue;
374 Fail = true;
375 if (FailBlocks)
376 FailBlocks->insert(Succ);
377 else
378 break;
379 }
380
381 return !Fail;
382}
383
384/// Update PHI nodes in Succ to indicate that there will now be entries in it
385/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
386/// will be the same as those coming in from ExistPred, an existing predecessor
387/// of Succ.
388static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
389 BasicBlock *ExistPred,
390 MemorySSAUpdater *MSSAU = nullptr) {
391 for (PHINode &PN : Succ->phis())
392 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
393 if (MSSAU)
394 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
395 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
396}
397
398/// Compute an abstract "cost" of speculating the given instruction,
399/// which is assumed to be safe to speculate. TCC_Free means cheap,
400/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
401/// expensive.
403 const TargetTransformInfo &TTI) {
405}
406
407/// If we have a merge point of an "if condition" as accepted above,
408/// return true if the specified value dominates the block. We don't handle
409/// the true generality of domination here, just a special case which works
410/// well enough for us.
411///
412/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
413/// see if V (which must be an instruction) and its recursive operands
414/// that do not dominate BB have a combined cost lower than Budget and
415/// are non-trapping. If both are true, the instruction is inserted into the
416/// set and true is returned.
417///
418/// The cost for most non-trapping instructions is defined as 1 except for
419/// Select whose cost is 2.
420///
421/// After this function returns, Cost is increased by the cost of
422/// V plus its non-dominating operands. If that cost is greater than
423/// Budget, false is returned and Cost is undefined.
424static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
425 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
428 AssumptionCache *AC, unsigned Depth = 0) {
429 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
430 // so limit the recursion depth.
431 // TODO: While this recursion limit does prevent pathological behavior, it
432 // would be better to track visited instructions to avoid cycles.
434 return false;
435
436 Instruction *I = dyn_cast<Instruction>(V);
437 if (!I) {
438 // Non-instructions dominate all instructions and can be executed
439 // unconditionally.
440 return true;
441 }
442 BasicBlock *PBB = I->getParent();
443
444 // We don't want to allow weird loops that might have the "if condition" in
445 // the bottom of this block.
446 if (PBB == BB)
447 return false;
448
449 // If this instruction is defined in a block that contains an unconditional
450 // branch to BB, then it must be in the 'conditional' part of the "if
451 // statement". If not, it definitely dominates the region.
452 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
453 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
454 return true;
455
456 // If we have seen this instruction before, don't count it again.
457 if (AggressiveInsts.count(I))
458 return true;
459
460 // Okay, it looks like the instruction IS in the "condition". Check to
461 // see if it's a cheap instruction to unconditionally compute, and if it
462 // only uses stuff defined outside of the condition. If so, hoist it out.
463 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
464 return false;
465
467
468 // Allow exactly one instruction to be speculated regardless of its cost
469 // (as long as it is safe to do so).
470 // This is intended to flatten the CFG even if the instruction is a division
471 // or other expensive operation. The speculation of an expensive instruction
472 // is expected to be undone in CodeGenPrepare if the speculation has not
473 // enabled further IR optimizations.
474 if (Cost > Budget &&
475 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
476 !Cost.isValid()))
477 return false;
478
479 // Okay, we can only really hoist these out if their operands do
480 // not take us over the cost threshold.
481 for (Use &Op : I->operands())
482 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
483 TTI, AC, Depth + 1))
484 return false;
485 // Okay, it's safe to do this! Remember this instruction.
486 AggressiveInsts.insert(I);
487 return true;
488}
489
490/// Extract ConstantInt from value, looking through IntToPtr
491/// and PointerNullValue. Return NULL if value is not a constant int.
493 // Normal constant int.
494 ConstantInt *CI = dyn_cast<ConstantInt>(V);
495 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
496 DL.isNonIntegralPointerType(V->getType()))
497 return CI;
498
499 // This is some kind of pointer constant. Turn it into a pointer-sized
500 // ConstantInt if possible.
501 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
502
503 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
504 if (isa<ConstantPointerNull>(V))
505 return ConstantInt::get(PtrTy, 0);
506
507 // IntToPtr const int.
508 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
509 if (CE->getOpcode() == Instruction::IntToPtr)
510 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
511 // The constant is very likely to have the right type already.
512 if (CI->getType() == PtrTy)
513 return CI;
514 else
515 return cast<ConstantInt>(
516 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
517 }
518 return nullptr;
519}
520
521namespace {
522
523/// Given a chain of or (||) or and (&&) comparison of a value against a
524/// constant, this will try to recover the information required for a switch
525/// structure.
526/// It will depth-first traverse the chain of comparison, seeking for patterns
527/// like %a == 12 or %a < 4 and combine them to produce a set of integer
528/// representing the different cases for the switch.
529/// Note that if the chain is composed of '||' it will build the set of elements
530/// that matches the comparisons (i.e. any of this value validate the chain)
531/// while for a chain of '&&' it will build the set elements that make the test
532/// fail.
533struct ConstantComparesGatherer {
534 const DataLayout &DL;
535
536 /// Value found for the switch comparison
537 Value *CompValue = nullptr;
538
539 /// Extra clause to be checked before the switch
540 Value *Extra = nullptr;
541
542 /// Set of integers to match in switch
544
545 /// Number of comparisons matched in the and/or chain
546 unsigned UsedICmps = 0;
547
548 /// Construct and compute the result for the comparison instruction Cond
549 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
550 gather(Cond);
551 }
552
553 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
554 ConstantComparesGatherer &
555 operator=(const ConstantComparesGatherer &) = delete;
556
557private:
558 /// Try to set the current value used for the comparison, it succeeds only if
559 /// it wasn't set before or if the new value is the same as the old one
560 bool setValueOnce(Value *NewVal) {
561 if (CompValue && CompValue != NewVal)
562 return false;
563 CompValue = NewVal;
564 return (CompValue != nullptr);
565 }
566
567 /// Try to match Instruction "I" as a comparison against a constant and
568 /// populates the array Vals with the set of values that match (or do not
569 /// match depending on isEQ).
570 /// Return false on failure. On success, the Value the comparison matched
571 /// against is placed in CompValue.
572 /// If CompValue is already set, the function is expected to fail if a match
573 /// is found but the value compared to is different.
574 bool matchInstruction(Instruction *I, bool isEQ) {
575 // If this is an icmp against a constant, handle this as one of the cases.
576 ICmpInst *ICI;
577 ConstantInt *C;
578 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
579 (C = getConstantInt(I->getOperand(1), DL)))) {
580 return false;
581 }
582
583 Value *RHSVal;
584 const APInt *RHSC;
585
586 // Pattern match a special case
587 // (x & ~2^z) == y --> x == y || x == y|2^z
588 // This undoes a transformation done by instcombine to fuse 2 compares.
589 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
590 // It's a little bit hard to see why the following transformations are
591 // correct. Here is a CVC3 program to verify them for 64-bit values:
592
593 /*
594 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
595 x : BITVECTOR(64);
596 y : BITVECTOR(64);
597 z : BITVECTOR(64);
598 mask : BITVECTOR(64) = BVSHL(ONE, z);
599 QUERY( (y & ~mask = y) =>
600 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
601 );
602 QUERY( (y | mask = y) =>
603 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
604 );
605 */
606
607 // Please note that each pattern must be a dual implication (<--> or
608 // iff). One directional implication can create spurious matches. If the
609 // implication is only one-way, an unsatisfiable condition on the left
610 // side can imply a satisfiable condition on the right side. Dual
611 // implication ensures that satisfiable conditions are transformed to
612 // other satisfiable conditions and unsatisfiable conditions are
613 // transformed to other unsatisfiable conditions.
614
615 // Here is a concrete example of a unsatisfiable condition on the left
616 // implying a satisfiable condition on the right:
617 //
618 // mask = (1 << z)
619 // (x & ~mask) == y --> (x == y || x == (y | mask))
620 //
621 // Substituting y = 3, z = 0 yields:
622 // (x & -2) == 3 --> (x == 3 || x == 2)
623
624 // Pattern match a special case:
625 /*
626 QUERY( (y & ~mask = y) =>
627 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
628 );
629 */
630 if (match(ICI->getOperand(0),
631 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
632 APInt Mask = ~*RHSC;
633 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
634 // If we already have a value for the switch, it has to match!
635 if (!setValueOnce(RHSVal))
636 return false;
637
638 Vals.push_back(C);
639 Vals.push_back(
640 ConstantInt::get(C->getContext(),
641 C->getValue() | Mask));
642 UsedICmps++;
643 return true;
644 }
645 }
646
647 // Pattern match a special case:
648 /*
649 QUERY( (y | mask = y) =>
650 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
651 );
652 */
653 if (match(ICI->getOperand(0),
654 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
655 APInt Mask = *RHSC;
656 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
657 // If we already have a value for the switch, it has to match!
658 if (!setValueOnce(RHSVal))
659 return false;
660
661 Vals.push_back(C);
662 Vals.push_back(ConstantInt::get(C->getContext(),
663 C->getValue() & ~Mask));
664 UsedICmps++;
665 return true;
666 }
667 }
668
669 // If we already have a value for the switch, it has to match!
670 if (!setValueOnce(ICI->getOperand(0)))
671 return false;
672
673 UsedICmps++;
674 Vals.push_back(C);
675 return ICI->getOperand(0);
676 }
677
678 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
679 ConstantRange Span =
681
682 // Shift the range if the compare is fed by an add. This is the range
683 // compare idiom as emitted by instcombine.
684 Value *CandidateVal = I->getOperand(0);
685 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
686 Span = Span.subtract(*RHSC);
687 CandidateVal = RHSVal;
688 }
689
690 // If this is an and/!= check, then we are looking to build the set of
691 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
692 // x != 0 && x != 1.
693 if (!isEQ)
694 Span = Span.inverse();
695
696 // If there are a ton of values, we don't want to make a ginormous switch.
697 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
698 return false;
699 }
700
701 // If we already have a value for the switch, it has to match!
702 if (!setValueOnce(CandidateVal))
703 return false;
704
705 // Add all values from the range to the set
706 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
707 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
708
709 UsedICmps++;
710 return true;
711 }
712
713 /// Given a potentially 'or'd or 'and'd together collection of icmp
714 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
715 /// the value being compared, and stick the list constants into the Vals
716 /// vector.
717 /// One "Extra" case is allowed to differ from the other.
718 void gather(Value *V) {
719 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
720
721 // Keep a stack (SmallVector for efficiency) for depth-first traversal
724
725 // Initialize
726 Visited.insert(V);
727 DFT.push_back(V);
728
729 while (!DFT.empty()) {
730 V = DFT.pop_back_val();
731
732 if (Instruction *I = dyn_cast<Instruction>(V)) {
733 // If it is a || (or && depending on isEQ), process the operands.
734 Value *Op0, *Op1;
735 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
736 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
737 if (Visited.insert(Op1).second)
738 DFT.push_back(Op1);
739 if (Visited.insert(Op0).second)
740 DFT.push_back(Op0);
741
742 continue;
743 }
744
745 // Try to match the current instruction
746 if (matchInstruction(I, isEQ))
747 // Match succeed, continue the loop
748 continue;
749 }
750
751 // One element of the sequence of || (or &&) could not be match as a
752 // comparison against the same value as the others.
753 // We allow only one "Extra" case to be checked before the switch
754 if (!Extra) {
755 Extra = V;
756 continue;
757 }
758 // Failed to parse a proper sequence, abort now
759 CompValue = nullptr;
760 break;
761 }
762 }
763};
764
765} // end anonymous namespace
766
768 MemorySSAUpdater *MSSAU = nullptr) {
769 Instruction *Cond = nullptr;
770 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
771 Cond = dyn_cast<Instruction>(SI->getCondition());
772 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
773 if (BI->isConditional())
774 Cond = dyn_cast<Instruction>(BI->getCondition());
775 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
776 Cond = dyn_cast<Instruction>(IBI->getAddress());
777 }
778
779 TI->eraseFromParent();
780 if (Cond)
782}
783
784/// Return true if the specified terminator checks
785/// to see if a value is equal to constant integer value.
786Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
787 Value *CV = nullptr;
788 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
789 // Do not permit merging of large switch instructions into their
790 // predecessors unless there is only one predecessor.
791 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
792 CV = SI->getCondition();
793 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
794 if (BI->isConditional() && BI->getCondition()->hasOneUse())
795 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
796 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
797 CV = ICI->getOperand(0);
798 }
799
800 // Unwrap any lossless ptrtoint cast.
801 if (CV) {
802 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
803 Value *Ptr = PTII->getPointerOperand();
804 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
805 CV = Ptr;
806 }
807 }
808 return CV;
809}
810
811/// Given a value comparison instruction,
812/// decode all of the 'cases' that it represents and return the 'default' block.
813BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
814 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
815 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
816 Cases.reserve(SI->getNumCases());
817 for (auto Case : SI->cases())
818 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
819 Case.getCaseSuccessor()));
820 return SI->getDefaultDest();
821 }
822
823 BranchInst *BI = cast<BranchInst>(TI);
824 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
825 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
826 Cases.push_back(ValueEqualityComparisonCase(
827 getConstantInt(ICI->getOperand(1), DL), Succ));
828 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
829}
830
831/// Given a vector of bb/value pairs, remove any entries
832/// in the list that match the specified block.
833static void
835 std::vector<ValueEqualityComparisonCase> &Cases) {
836 llvm::erase(Cases, BB);
837}
838
839/// Return true if there are any keys in C1 that exist in C2 as well.
840static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
841 std::vector<ValueEqualityComparisonCase> &C2) {
842 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
843
844 // Make V1 be smaller than V2.
845 if (V1->size() > V2->size())
846 std::swap(V1, V2);
847
848 if (V1->empty())
849 return false;
850 if (V1->size() == 1) {
851 // Just scan V2.
852 ConstantInt *TheVal = (*V1)[0].Value;
853 for (const ValueEqualityComparisonCase &VECC : *V2)
854 if (TheVal == VECC.Value)
855 return true;
856 }
857
858 // Otherwise, just sort both lists and compare element by element.
859 array_pod_sort(V1->begin(), V1->end());
860 array_pod_sort(V2->begin(), V2->end());
861 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
862 while (i1 != e1 && i2 != e2) {
863 if ((*V1)[i1].Value == (*V2)[i2].Value)
864 return true;
865 if ((*V1)[i1].Value < (*V2)[i2].Value)
866 ++i1;
867 else
868 ++i2;
869 }
870 return false;
871}
872
873// Set branch weights on SwitchInst. This sets the metadata if there is at
874// least one non-zero weight.
876 bool IsExpected) {
877 // Check that there is at least one non-zero weight. Otherwise, pass
878 // nullptr to setMetadata which will erase the existing metadata.
879 MDNode *N = nullptr;
880 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
881 N = MDBuilder(SI->getParent()->getContext())
882 .createBranchWeights(Weights, IsExpected);
883 SI->setMetadata(LLVMContext::MD_prof, N);
884}
885
886// Similar to the above, but for branch and select instructions that take
887// exactly 2 weights.
888static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
889 uint32_t FalseWeight, bool IsExpected) {
890 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
891 // Check that there is at least one non-zero weight. Otherwise, pass
892 // nullptr to setMetadata which will erase the existing metadata.
893 MDNode *N = nullptr;
894 if (TrueWeight || FalseWeight)
895 N = MDBuilder(I->getParent()->getContext())
896 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
897 I->setMetadata(LLVMContext::MD_prof, N);
898}
899
900/// If TI is known to be a terminator instruction and its block is known to
901/// only have a single predecessor block, check to see if that predecessor is
902/// also a value comparison with the same value, and if that comparison
903/// determines the outcome of this comparison. If so, simplify TI. This does a
904/// very limited form of jump threading.
905bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
906 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
907 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
908 if (!PredVal)
909 return false; // Not a value comparison in predecessor.
910
911 Value *ThisVal = isValueEqualityComparison(TI);
912 assert(ThisVal && "This isn't a value comparison!!");
913 if (ThisVal != PredVal)
914 return false; // Different predicates.
915
916 // TODO: Preserve branch weight metadata, similarly to how
917 // foldValueComparisonIntoPredecessors preserves it.
918
919 // Find out information about when control will move from Pred to TI's block.
920 std::vector<ValueEqualityComparisonCase> PredCases;
921 BasicBlock *PredDef =
922 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
923 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
924
925 // Find information about how control leaves this block.
926 std::vector<ValueEqualityComparisonCase> ThisCases;
927 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
928 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
929
930 // If TI's block is the default block from Pred's comparison, potentially
931 // simplify TI based on this knowledge.
932 if (PredDef == TI->getParent()) {
933 // If we are here, we know that the value is none of those cases listed in
934 // PredCases. If there are any cases in ThisCases that are in PredCases, we
935 // can simplify TI.
936 if (!valuesOverlap(PredCases, ThisCases))
937 return false;
938
939 if (isa<BranchInst>(TI)) {
940 // Okay, one of the successors of this condbr is dead. Convert it to a
941 // uncond br.
942 assert(ThisCases.size() == 1 && "Branch can only have one case!");
943 // Insert the new branch.
944 Instruction *NI = Builder.CreateBr(ThisDef);
945 (void)NI;
946
947 // Remove PHI node entries for the dead edge.
948 ThisCases[0].Dest->removePredecessor(PredDef);
949
950 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
951 << "Through successor TI: " << *TI << "Leaving: " << *NI
952 << "\n");
953
955
956 if (DTU)
957 DTU->applyUpdates(
958 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
959
960 return true;
961 }
962
963 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
964 // Okay, TI has cases that are statically dead, prune them away.
966 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
967 DeadCases.insert(PredCases[i].Value);
968
969 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
970 << "Through successor TI: " << *TI);
971
972 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
973 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
974 --i;
975 auto *Successor = i->getCaseSuccessor();
976 if (DTU)
977 ++NumPerSuccessorCases[Successor];
978 if (DeadCases.count(i->getCaseValue())) {
979 Successor->removePredecessor(PredDef);
980 SI.removeCase(i);
981 if (DTU)
982 --NumPerSuccessorCases[Successor];
983 }
984 }
985
986 if (DTU) {
987 std::vector<DominatorTree::UpdateType> Updates;
988 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
989 if (I.second == 0)
990 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
991 DTU->applyUpdates(Updates);
992 }
993
994 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
995 return true;
996 }
997
998 // Otherwise, TI's block must correspond to some matched value. Find out
999 // which value (or set of values) this is.
1000 ConstantInt *TIV = nullptr;
1001 BasicBlock *TIBB = TI->getParent();
1002 for (const auto &[Value, Dest] : PredCases)
1003 if (Dest == TIBB) {
1004 if (TIV)
1005 return false; // Cannot handle multiple values coming to this block.
1006 TIV = Value;
1007 }
1008 assert(TIV && "No edge from pred to succ?");
1009
1010 // Okay, we found the one constant that our value can be if we get into TI's
1011 // BB. Find out which successor will unconditionally be branched to.
1012 BasicBlock *TheRealDest = nullptr;
1013 for (const auto &[Value, Dest] : ThisCases)
1014 if (Value == TIV) {
1015 TheRealDest = Dest;
1016 break;
1017 }
1018
1019 // If not handled by any explicit cases, it is handled by the default case.
1020 if (!TheRealDest)
1021 TheRealDest = ThisDef;
1022
1023 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1024
1025 // Remove PHI node entries for dead edges.
1026 BasicBlock *CheckEdge = TheRealDest;
1027 for (BasicBlock *Succ : successors(TIBB))
1028 if (Succ != CheckEdge) {
1029 if (Succ != TheRealDest)
1030 RemovedSuccs.insert(Succ);
1031 Succ->removePredecessor(TIBB);
1032 } else
1033 CheckEdge = nullptr;
1034
1035 // Insert the new branch.
1036 Instruction *NI = Builder.CreateBr(TheRealDest);
1037 (void)NI;
1038
1039 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1040 << "Through successor TI: " << *TI << "Leaving: " << *NI
1041 << "\n");
1042
1044 if (DTU) {
1046 Updates.reserve(RemovedSuccs.size());
1047 for (auto *RemovedSucc : RemovedSuccs)
1048 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1049 DTU->applyUpdates(Updates);
1050 }
1051 return true;
1052}
1053
1054namespace {
1055
1056/// This class implements a stable ordering of constant
1057/// integers that does not depend on their address. This is important for
1058/// applications that sort ConstantInt's to ensure uniqueness.
1059struct ConstantIntOrdering {
1060 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1061 return LHS->getValue().ult(RHS->getValue());
1062 }
1063};
1064
1065} // end anonymous namespace
1066
1068 ConstantInt *const *P2) {
1069 const ConstantInt *LHS = *P1;
1070 const ConstantInt *RHS = *P2;
1071 if (LHS == RHS)
1072 return 0;
1073 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1074}
1075
1076/// Get Weights of a given terminator, the default weight is at the front
1077/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1078/// metadata.
1080 SmallVectorImpl<uint64_t> &Weights) {
1081 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1082 assert(MD && "Invalid branch-weight metadata");
1083 extractFromBranchWeightMD64(MD, Weights);
1084
1085 // If TI is a conditional eq, the default case is the false case,
1086 // and the corresponding branch-weight data is at index 2. We swap the
1087 // default weight to be the first entry.
1088 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1089 assert(Weights.size() == 2);
1090 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1091 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1092 std::swap(Weights.front(), Weights.back());
1093 }
1094}
1095
1096/// Keep halving the weights until all can fit in uint32_t.
1098 uint64_t Max = *llvm::max_element(Weights);
1099 if (Max > UINT_MAX) {
1100 unsigned Offset = 32 - llvm::countl_zero(Max);
1101 for (uint64_t &I : Weights)
1102 I >>= Offset;
1103 }
1104}
1105
1107 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1108 Instruction *PTI = PredBlock->getTerminator();
1109
1110 // If we have bonus instructions, clone them into the predecessor block.
1111 // Note that there may be multiple predecessor blocks, so we cannot move
1112 // bonus instructions to a predecessor block.
1113 for (Instruction &BonusInst : *BB) {
1114 if (BonusInst.isTerminator())
1115 continue;
1116
1117 Instruction *NewBonusInst = BonusInst.clone();
1118
1119 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1120 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1121 // Unless the instruction has the same !dbg location as the original
1122 // branch, drop it. When we fold the bonus instructions we want to make
1123 // sure we reset their debug locations in order to avoid stepping on
1124 // dead code caused by folding dead branches.
1125 NewBonusInst->setDebugLoc(DebugLoc());
1126 }
1127
1128 RemapInstruction(NewBonusInst, VMap,
1130
1131 // If we speculated an instruction, we need to drop any metadata that may
1132 // result in undefined behavior, as the metadata might have been valid
1133 // only given the branch precondition.
1134 // Similarly strip attributes on call parameters that may cause UB in
1135 // location the call is moved to.
1136 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1137
1138 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1139 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1140 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1142
1143 if (isa<DbgInfoIntrinsic>(BonusInst))
1144 continue;
1145
1146 NewBonusInst->takeName(&BonusInst);
1147 BonusInst.setName(NewBonusInst->getName() + ".old");
1148 VMap[&BonusInst] = NewBonusInst;
1149
1150 // Update (liveout) uses of bonus instructions,
1151 // now that the bonus instruction has been cloned into predecessor.
1152 // Note that we expect to be in a block-closed SSA form for this to work!
1153 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1154 auto *UI = cast<Instruction>(U.getUser());
1155 auto *PN = dyn_cast<PHINode>(UI);
1156 if (!PN) {
1157 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1158 "If the user is not a PHI node, then it should be in the same "
1159 "block as, and come after, the original bonus instruction.");
1160 continue; // Keep using the original bonus instruction.
1161 }
1162 // Is this the block-closed SSA form PHI node?
1163 if (PN->getIncomingBlock(U) == BB)
1164 continue; // Great, keep using the original bonus instruction.
1165 // The only other alternative is an "use" when coming from
1166 // the predecessor block - here we should refer to the cloned bonus instr.
1167 assert(PN->getIncomingBlock(U) == PredBlock &&
1168 "Not in block-closed SSA form?");
1169 U.set(NewBonusInst);
1170 }
1171 }
1172}
1173
1174bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1175 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1176 BasicBlock *BB = TI->getParent();
1177 BasicBlock *Pred = PTI->getParent();
1178
1180
1181 // Figure out which 'cases' to copy from SI to PSI.
1182 std::vector<ValueEqualityComparisonCase> BBCases;
1183 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1184
1185 std::vector<ValueEqualityComparisonCase> PredCases;
1186 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1187
1188 // Based on whether the default edge from PTI goes to BB or not, fill in
1189 // PredCases and PredDefault with the new switch cases we would like to
1190 // build.
1192
1193 // Update the branch weight metadata along the way
1195 bool PredHasWeights = hasBranchWeightMD(*PTI);
1196 bool SuccHasWeights = hasBranchWeightMD(*TI);
1197
1198 if (PredHasWeights) {
1199 getBranchWeights(PTI, Weights);
1200 // branch-weight metadata is inconsistent here.
1201 if (Weights.size() != 1 + PredCases.size())
1202 PredHasWeights = SuccHasWeights = false;
1203 } else if (SuccHasWeights)
1204 // If there are no predecessor weights but there are successor weights,
1205 // populate Weights with 1, which will later be scaled to the sum of
1206 // successor's weights
1207 Weights.assign(1 + PredCases.size(), 1);
1208
1209 SmallVector<uint64_t, 8> SuccWeights;
1210 if (SuccHasWeights) {
1211 getBranchWeights(TI, SuccWeights);
1212 // branch-weight metadata is inconsistent here.
1213 if (SuccWeights.size() != 1 + BBCases.size())
1214 PredHasWeights = SuccHasWeights = false;
1215 } else if (PredHasWeights)
1216 SuccWeights.assign(1 + BBCases.size(), 1);
1217
1218 if (PredDefault == BB) {
1219 // If this is the default destination from PTI, only the edges in TI
1220 // that don't occur in PTI, or that branch to BB will be activated.
1221 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1222 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1223 if (PredCases[i].Dest != BB)
1224 PTIHandled.insert(PredCases[i].Value);
1225 else {
1226 // The default destination is BB, we don't need explicit targets.
1227 std::swap(PredCases[i], PredCases.back());
1228
1229 if (PredHasWeights || SuccHasWeights) {
1230 // Increase weight for the default case.
1231 Weights[0] += Weights[i + 1];
1232 std::swap(Weights[i + 1], Weights.back());
1233 Weights.pop_back();
1234 }
1235
1236 PredCases.pop_back();
1237 --i;
1238 --e;
1239 }
1240
1241 // Reconstruct the new switch statement we will be building.
1242 if (PredDefault != BBDefault) {
1243 PredDefault->removePredecessor(Pred);
1244 if (DTU && PredDefault != BB)
1245 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1246 PredDefault = BBDefault;
1247 ++NewSuccessors[BBDefault];
1248 }
1249
1250 unsigned CasesFromPred = Weights.size();
1251 uint64_t ValidTotalSuccWeight = 0;
1252 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1253 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1254 PredCases.push_back(BBCases[i]);
1255 ++NewSuccessors[BBCases[i].Dest];
1256 if (SuccHasWeights || PredHasWeights) {
1257 // The default weight is at index 0, so weight for the ith case
1258 // should be at index i+1. Scale the cases from successor by
1259 // PredDefaultWeight (Weights[0]).
1260 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1261 ValidTotalSuccWeight += SuccWeights[i + 1];
1262 }
1263 }
1264
1265 if (SuccHasWeights || PredHasWeights) {
1266 ValidTotalSuccWeight += SuccWeights[0];
1267 // Scale the cases from predecessor by ValidTotalSuccWeight.
1268 for (unsigned i = 1; i < CasesFromPred; ++i)
1269 Weights[i] *= ValidTotalSuccWeight;
1270 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1271 Weights[0] *= SuccWeights[0];
1272 }
1273 } else {
1274 // If this is not the default destination from PSI, only the edges
1275 // in SI that occur in PSI with a destination of BB will be
1276 // activated.
1277 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1278 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1279 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1280 if (PredCases[i].Dest == BB) {
1281 PTIHandled.insert(PredCases[i].Value);
1282
1283 if (PredHasWeights || SuccHasWeights) {
1284 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1285 std::swap(Weights[i + 1], Weights.back());
1286 Weights.pop_back();
1287 }
1288
1289 std::swap(PredCases[i], PredCases.back());
1290 PredCases.pop_back();
1291 --i;
1292 --e;
1293 }
1294
1295 // Okay, now we know which constants were sent to BB from the
1296 // predecessor. Figure out where they will all go now.
1297 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1298 if (PTIHandled.count(BBCases[i].Value)) {
1299 // If this is one we are capable of getting...
1300 if (PredHasWeights || SuccHasWeights)
1301 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1302 PredCases.push_back(BBCases[i]);
1303 ++NewSuccessors[BBCases[i].Dest];
1304 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1305 }
1306
1307 // If there are any constants vectored to BB that TI doesn't handle,
1308 // they must go to the default destination of TI.
1309 for (ConstantInt *I : PTIHandled) {
1310 if (PredHasWeights || SuccHasWeights)
1311 Weights.push_back(WeightsForHandled[I]);
1312 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1313 ++NewSuccessors[BBDefault];
1314 }
1315 }
1316
1317 // Okay, at this point, we know which new successor Pred will get. Make
1318 // sure we update the number of entries in the PHI nodes for these
1319 // successors.
1320 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1321 if (DTU) {
1322 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1323 Updates.reserve(Updates.size() + NewSuccessors.size());
1324 }
1325 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1326 NewSuccessors) {
1327 for (auto I : seq(NewSuccessor.second)) {
1328 (void)I;
1329 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1330 }
1331 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1332 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1333 }
1334
1335 Builder.SetInsertPoint(PTI);
1336 // Convert pointer to int before we switch.
1337 if (CV->getType()->isPointerTy()) {
1338 CV =
1339 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1340 }
1341
1342 // Now that the successors are updated, create the new Switch instruction.
1343 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1344 NewSI->setDebugLoc(PTI->getDebugLoc());
1345 for (ValueEqualityComparisonCase &V : PredCases)
1346 NewSI->addCase(V.Value, V.Dest);
1347
1348 if (PredHasWeights || SuccHasWeights) {
1349 // Halve the weights if any of them cannot fit in an uint32_t
1350 fitWeights(Weights);
1351
1352 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1353
1354 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1355 }
1356
1358
1359 // Okay, last check. If BB is still a successor of PSI, then we must
1360 // have an infinite loop case. If so, add an infinitely looping block
1361 // to handle the case to preserve the behavior of the code.
1362 BasicBlock *InfLoopBlock = nullptr;
1363 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1364 if (NewSI->getSuccessor(i) == BB) {
1365 if (!InfLoopBlock) {
1366 // Insert it at the end of the function, because it's either code,
1367 // or it won't matter if it's hot. :)
1368 InfLoopBlock =
1369 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1370 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1371 if (DTU)
1372 Updates.push_back(
1373 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1374 }
1375 NewSI->setSuccessor(i, InfLoopBlock);
1376 }
1377
1378 if (DTU) {
1379 if (InfLoopBlock)
1380 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1381
1382 Updates.push_back({DominatorTree::Delete, Pred, BB});
1383
1384 DTU->applyUpdates(Updates);
1385 }
1386
1387 ++NumFoldValueComparisonIntoPredecessors;
1388 return true;
1389}
1390
1391/// The specified terminator is a value equality comparison instruction
1392/// (either a switch or a branch on "X == c").
1393/// See if any of the predecessors of the terminator block are value comparisons
1394/// on the same value. If so, and if safe to do so, fold them together.
1395bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1396 IRBuilder<> &Builder) {
1397 BasicBlock *BB = TI->getParent();
1398 Value *CV = isValueEqualityComparison(TI); // CondVal
1399 assert(CV && "Not a comparison?");
1400
1401 bool Changed = false;
1402
1404 while (!Preds.empty()) {
1405 BasicBlock *Pred = Preds.pop_back_val();
1406 Instruction *PTI = Pred->getTerminator();
1407
1408 // Don't try to fold into itself.
1409 if (Pred == BB)
1410 continue;
1411
1412 // See if the predecessor is a comparison with the same value.
1413 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1414 if (PCV != CV)
1415 continue;
1416
1418 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1419 for (auto *Succ : FailBlocks) {
1420 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1421 return false;
1422 }
1423 }
1424
1425 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1426 Changed = true;
1427 }
1428 return Changed;
1429}
1430
1431// If we would need to insert a select that uses the value of this invoke
1432// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1433// need to do this), we can't hoist the invoke, as there is nowhere to put the
1434// select in this case.
1436 Instruction *I1, Instruction *I2) {
1437 for (BasicBlock *Succ : successors(BB1)) {
1438 for (const PHINode &PN : Succ->phis()) {
1439 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1440 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1441 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1442 return false;
1443 }
1444 }
1445 }
1446 return true;
1447}
1448
1449// Get interesting characteristics of instructions that
1450// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1451// instructions can be reordered across.
1457
1459 unsigned Flags = 0;
1460 if (I->mayReadFromMemory())
1461 Flags |= SkipReadMem;
1462 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1463 // inalloca) across stacksave/stackrestore boundaries.
1464 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1465 Flags |= SkipSideEffect;
1467 Flags |= SkipImplicitControlFlow;
1468 return Flags;
1469}
1470
1471// Returns true if it is safe to reorder an instruction across preceding
1472// instructions in a basic block.
1473static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1474 // Don't reorder a store over a load.
1475 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1476 return false;
1477
1478 // If we have seen an instruction with side effects, it's unsafe to reorder an
1479 // instruction which reads memory or itself has side effects.
1480 if ((Flags & SkipSideEffect) &&
1481 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1482 return false;
1483
1484 // Reordering across an instruction which does not necessarily transfer
1485 // control to the next instruction is speculation.
1487 return false;
1488
1489 // Hoisting of llvm.deoptimize is only legal together with the next return
1490 // instruction, which this pass is not always able to do.
1491 if (auto *CB = dyn_cast<CallBase>(I))
1492 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1493 return false;
1494
1495 // It's also unsafe/illegal to hoist an instruction above its instruction
1496 // operands
1497 BasicBlock *BB = I->getParent();
1498 for (Value *Op : I->operands()) {
1499 if (auto *J = dyn_cast<Instruction>(Op))
1500 if (J->getParent() == BB)
1501 return false;
1502 }
1503
1504 return true;
1505}
1506
1507static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1508
1509/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1510/// instructions \p I1 and \p I2 can and should be hoisted.
1512 const TargetTransformInfo &TTI) {
1513 // If we're going to hoist a call, make sure that the two instructions
1514 // we're commoning/hoisting are both marked with musttail, or neither of
1515 // them is marked as such. Otherwise, we might end up in a situation where
1516 // we hoist from a block where the terminator is a `ret` to a block where
1517 // the terminator is a `br`, and `musttail` calls expect to be followed by
1518 // a return.
1519 auto *C1 = dyn_cast<CallInst>(I1);
1520 auto *C2 = dyn_cast<CallInst>(I2);
1521 if (C1 && C2)
1522 if (C1->isMustTailCall() != C2->isMustTailCall())
1523 return false;
1524
1526 return false;
1527
1528 // If any of the two call sites has nomerge or convergent attribute, stop
1529 // hoisting.
1530 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1531 if (CB1->cannotMerge() || CB1->isConvergent())
1532 return false;
1533 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1534 if (CB2->cannotMerge() || CB2->isConvergent())
1535 return false;
1536
1537 return true;
1538}
1539
1540/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1541/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1542/// hoistCommonCodeFromSuccessors. e.g. The input:
1543/// I1 DVRs: { x, z },
1544/// OtherInsts: { I2 DVRs: { x, y, z } }
1545/// would result in hoisting only DbgVariableRecord x.
1547 Instruction *TI, Instruction *I1,
1548 SmallVectorImpl<Instruction *> &OtherInsts) {
1549 if (!I1->hasDbgRecords())
1550 return;
1551 using CurrentAndEndIt =
1552 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1553 // Vector of {Current, End} iterators.
1555 Itrs.reserve(OtherInsts.size() + 1);
1556 // Helper lambdas for lock-step checks:
1557 // Return true if this Current == End.
1558 auto atEnd = [](const CurrentAndEndIt &Pair) {
1559 return Pair.first == Pair.second;
1560 };
1561 // Return true if all Current are identical.
1562 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1563 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1565 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1566 });
1567 };
1568
1569 // Collect the iterators.
1570 Itrs.push_back(
1571 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1572 for (Instruction *Other : OtherInsts) {
1573 if (!Other->hasDbgRecords())
1574 return;
1575 Itrs.push_back(
1576 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1577 }
1578
1579 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1580 // the lock-step DbgRecord are identical, hoist all of them to TI.
1581 // This replicates the dbg.* intrinsic behaviour in
1582 // hoistCommonCodeFromSuccessors.
1583 while (none_of(Itrs, atEnd)) {
1584 bool HoistDVRs = allIdentical(Itrs);
1585 for (CurrentAndEndIt &Pair : Itrs) {
1586 // Increment Current iterator now as we may be about to move the
1587 // DbgRecord.
1588 DbgRecord &DR = *Pair.first++;
1589 if (HoistDVRs) {
1590 DR.removeFromParent();
1591 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1592 }
1593 }
1594 }
1595}
1596
1598 const Instruction *I2) {
1599 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1600 return true;
1601
1602 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1603 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1604 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1605 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1606 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1607
1608 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1609 return I1->getOperand(0) == I2->getOperand(1) &&
1610 I1->getOperand(1) == I2->getOperand(0) &&
1611 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1612 }
1613
1614 return false;
1615}
1616
1617/// If the target supports conditional faulting,
1618/// we look for the following pattern:
1619/// \code
1620/// BB:
1621/// ...
1622/// %cond = icmp ult %x, %y
1623/// br i1 %cond, label %TrueBB, label %FalseBB
1624/// FalseBB:
1625/// store i32 1, ptr %q, align 4
1626/// ...
1627/// TrueBB:
1628/// %maskedloadstore = load i32, ptr %b, align 4
1629/// store i32 %maskedloadstore, ptr %p, align 4
1630/// ...
1631/// \endcode
1632///
1633/// and transform it into:
1634///
1635/// \code
1636/// BB:
1637/// ...
1638/// %cond = icmp ult %x, %y
1639/// %maskedloadstore = cload i32, ptr %b, %cond
1640/// cstore i32 %maskedloadstore, ptr %p, %cond
1641/// cstore i32 1, ptr %q, ~%cond
1642/// br i1 %cond, label %TrueBB, label %FalseBB
1643/// FalseBB:
1644/// ...
1645/// TrueBB:
1646/// ...
1647/// \endcode
1648///
1649/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1650/// e.g.
1651///
1652/// \code
1653/// %vcond = bitcast i1 %cond to <1 x i1>
1654/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1655/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1656/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1657/// call void @llvm.masked.store.v1i32.p0
1658/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1659/// %cond.not = xor i1 %cond, true
1660/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1661/// call void @llvm.masked.store.v1i32.p0
1662/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1663/// \endcode
1664///
1665/// So we need to turn hoisted load/store into cload/cstore.
1666///
1667/// \param BI The branch instruction.
1668/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1669/// will be speculated.
1670/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1672 BranchInst *BI,
1673 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1674 std::optional<bool> Invert) {
1675 auto &Context = BI->getParent()->getContext();
1676 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1677 auto *Cond = BI->getOperand(0);
1678 // Construct the condition if needed.
1679 BasicBlock *BB = BI->getParent();
1680 IRBuilder<> Builder(
1681 Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
1682 Value *Mask = nullptr;
1683 Value *MaskFalse = nullptr;
1684 Value *MaskTrue = nullptr;
1685 if (Invert.has_value()) {
1686 Mask = Builder.CreateBitCast(
1687 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1688 VCondTy);
1689 } else {
1690 MaskFalse = Builder.CreateBitCast(
1691 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1692 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1693 }
1694 auto PeekThroughBitcasts = [](Value *V) {
1695 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1696 V = BitCast->getOperand(0);
1697 return V;
1698 };
1699 for (auto *I : SpeculatedConditionalLoadsStores) {
1700 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1701 if (!Invert.has_value())
1702 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1703 // We currently assume conditional faulting load/store is supported for
1704 // scalar types only when creating new instructions. This can be easily
1705 // extended for vector types in the future.
1706 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1707 auto *Op0 = I->getOperand(0);
1708 CallInst *MaskedLoadStore = nullptr;
1709 if (auto *LI = dyn_cast<LoadInst>(I)) {
1710 // Handle Load.
1711 auto *Ty = I->getType();
1712 PHINode *PN = nullptr;
1713 Value *PassThru = nullptr;
1714 if (Invert.has_value())
1715 for (User *U : I->users())
1716 if ((PN = dyn_cast<PHINode>(U))) {
1717 PassThru = Builder.CreateBitCast(
1718 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1719 FixedVectorType::get(Ty, 1));
1720 break;
1721 }
1722 MaskedLoadStore = Builder.CreateMaskedLoad(
1723 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1724 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1725 if (PN)
1726 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1727 I->replaceAllUsesWith(NewLoadStore);
1728 } else {
1729 // Handle Store.
1730 auto *StoredVal = Builder.CreateBitCast(
1731 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1732 MaskedLoadStore = Builder.CreateMaskedStore(
1733 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1734 }
1735 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1736 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1737 //
1738 // !nonnull, !align : Not support pointer type, no need to keep.
1739 // !range: Load type is changed from scalar to vector, but the metadata on
1740 // vector specifies a per-element range, so the semantics stay the
1741 // same. Keep it.
1742 // !annotation: Not impact semantics. Keep it.
1743 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1744 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1745 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1746 // FIXME: DIAssignID is not supported for masked store yet.
1747 // (Verifier::visitDIAssignIDMetadata)
1749 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1750 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1751 });
1752 MaskedLoadStore->copyMetadata(*I);
1753 I->eraseFromParent();
1754 }
1755}
1756
1758 const TargetTransformInfo &TTI) {
1759 // Not handle volatile or atomic.
1760 if (auto *L = dyn_cast<LoadInst>(I)) {
1761 if (!L->isSimple())
1762 return false;
1763 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1764 if (!S->isSimple())
1765 return false;
1766 } else
1767 return false;
1768
1769 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1770 // That's why we have the alignment limitation.
1771 // FIXME: Update the prototype of the intrinsics?
1774}
1775
1776namespace {
1777
1778// LockstepReverseIterator - Iterates through instructions
1779// in a set of blocks in reverse order from the first non-terminator.
1780// For example (assume all blocks have size n):
1781// LockstepReverseIterator I([B1, B2, B3]);
1782// *I-- = [B1[n], B2[n], B3[n]];
1783// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1784// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1785// ...
1786class LockstepReverseIterator {
1789 bool Fail;
1790
1791public:
1792 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
1793 reset();
1794 }
1795
1796 void reset() {
1797 Fail = false;
1798 Insts.clear();
1799 for (auto *BB : Blocks) {
1800 Instruction *Inst = BB->getTerminator();
1801 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1802 Inst = Inst->getPrevNode();
1803 if (!Inst) {
1804 // Block wasn't big enough.
1805 Fail = true;
1806 return;
1807 }
1808 Insts.push_back(Inst);
1809 }
1810 }
1811
1812 bool isValid() const { return !Fail; }
1813
1814 void operator--() {
1815 if (Fail)
1816 return;
1817 for (auto *&Inst : Insts) {
1818 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1819 Inst = Inst->getPrevNode();
1820 // Already at beginning of block.
1821 if (!Inst) {
1822 Fail = true;
1823 return;
1824 }
1825 }
1826 }
1827
1828 void operator++() {
1829 if (Fail)
1830 return;
1831 for (auto *&Inst : Insts) {
1832 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1833 Inst = Inst->getNextNode();
1834 // Already at end of block.
1835 if (!Inst) {
1836 Fail = true;
1837 return;
1838 }
1839 }
1840 }
1841
1842 ArrayRef<Instruction *> operator*() const { return Insts; }
1843};
1844
1845} // end anonymous namespace
1846
1847/// Hoist any common code in the successor blocks up into the block. This
1848/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1849/// given, only perform hoisting in case all successors blocks contain matching
1850/// instructions only. In that case, all instructions can be hoisted and the
1851/// original branch will be replaced and selects for PHIs are added.
1852bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1853 bool AllInstsEqOnly) {
1854 // This does very trivial matching, with limited scanning, to find identical
1855 // instructions in the two blocks. In particular, we don't want to get into
1856 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1857 // such, we currently just scan for obviously identical instructions in an
1858 // identical order, possibly separated by the same number of non-identical
1859 // instructions.
1860 BasicBlock *BB = TI->getParent();
1861 unsigned int SuccSize = succ_size(BB);
1862 if (SuccSize < 2)
1863 return false;
1864
1865 // If either of the blocks has it's address taken, then we can't do this fold,
1866 // because the code we'd hoist would no longer run when we jump into the block
1867 // by it's address.
1868 for (auto *Succ : successors(BB))
1869 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1870 return false;
1871
1872 // The second of pair is a SkipFlags bitmask.
1873 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1874 SmallVector<SuccIterPair, 8> SuccIterPairs;
1875 for (auto *Succ : successors(BB)) {
1876 BasicBlock::iterator SuccItr = Succ->begin();
1877 if (isa<PHINode>(*SuccItr))
1878 return false;
1879 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1880 }
1881
1882 if (AllInstsEqOnly) {
1883 // Check if all instructions in the successor blocks match. This allows
1884 // hoisting all instructions and removing the blocks we are hoisting from,
1885 // so does not add any new instructions.
1887 // Check if sizes and terminators of all successors match.
1888 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1889 Instruction *Term0 = Succs[0]->getTerminator();
1890 Instruction *Term = Succ->getTerminator();
1891 return !Term->isSameOperationAs(Term0) ||
1892 !equal(Term->operands(), Term0->operands()) ||
1893 Succs[0]->size() != Succ->size();
1894 });
1895 if (!AllSame)
1896 return false;
1897 if (AllSame) {
1898 LockstepReverseIterator LRI(Succs);
1899 while (LRI.isValid()) {
1900 Instruction *I0 = (*LRI)[0];
1901 if (any_of(*LRI, [I0](Instruction *I) {
1902 return !areIdenticalUpToCommutativity(I0, I);
1903 })) {
1904 return false;
1905 }
1906 --LRI;
1907 }
1908 }
1909 // Now we know that all instructions in all successors can be hoisted. Let
1910 // the loop below handle the hoisting.
1911 }
1912
1913 // Count how many instructions were not hoisted so far. There's a limit on how
1914 // many instructions we skip, serving as a compilation time control as well as
1915 // preventing excessive increase of life ranges.
1916 unsigned NumSkipped = 0;
1917 // If we find an unreachable instruction at the beginning of a basic block, we
1918 // can still hoist instructions from the rest of the basic blocks.
1919 if (SuccIterPairs.size() > 2) {
1920 erase_if(SuccIterPairs,
1921 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1922 if (SuccIterPairs.size() < 2)
1923 return false;
1924 }
1925
1926 bool Changed = false;
1927
1928 for (;;) {
1929 auto *SuccIterPairBegin = SuccIterPairs.begin();
1930 auto &BB1ItrPair = *SuccIterPairBegin++;
1931 auto OtherSuccIterPairRange =
1932 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1933 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1934
1935 Instruction *I1 = &*BB1ItrPair.first;
1936
1937 // Skip debug info if it is not identical.
1938 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1939 Instruction *I2 = &*Iter;
1940 return I1->isIdenticalToWhenDefined(I2);
1941 });
1942 if (!AllDbgInstsAreIdentical) {
1943 while (isa<DbgInfoIntrinsic>(I1))
1944 I1 = &*++BB1ItrPair.first;
1945 for (auto &SuccIter : OtherSuccIterRange) {
1946 Instruction *I2 = &*SuccIter;
1947 while (isa<DbgInfoIntrinsic>(I2))
1948 I2 = &*++SuccIter;
1949 }
1950 }
1951
1952 bool AllInstsAreIdentical = true;
1953 bool HasTerminator = I1->isTerminator();
1954 for (auto &SuccIter : OtherSuccIterRange) {
1955 Instruction *I2 = &*SuccIter;
1956 HasTerminator |= I2->isTerminator();
1957 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1958 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1959 AllInstsAreIdentical = false;
1960 }
1961
1963 for (auto &SuccIter : OtherSuccIterRange)
1964 OtherInsts.push_back(&*SuccIter);
1965
1966 // If we are hoisting the terminator instruction, don't move one (making a
1967 // broken BB), instead clone it, and remove BI.
1968 if (HasTerminator) {
1969 // Even if BB, which contains only one unreachable instruction, is ignored
1970 // at the beginning of the loop, we can hoist the terminator instruction.
1971 // If any instructions remain in the block, we cannot hoist terminators.
1972 if (NumSkipped || !AllInstsAreIdentical) {
1973 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1974 return Changed;
1975 }
1976
1977 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1978 Changed;
1979 }
1980
1981 if (AllInstsAreIdentical) {
1982 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1983 AllInstsAreIdentical =
1984 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1985 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1986 Instruction *I2 = &*Pair.first;
1987 unsigned SkipFlagsBB2 = Pair.second;
1988 // Even if the instructions are identical, it may not
1989 // be safe to hoist them if we have skipped over
1990 // instructions with side effects or their operands
1991 // weren't hoisted.
1992 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1994 });
1995 }
1996
1997 if (AllInstsAreIdentical) {
1998 BB1ItrPair.first++;
1999 if (isa<DbgInfoIntrinsic>(I1)) {
2000 // The debug location is an integral part of a debug info intrinsic
2001 // and can't be separated from it or replaced. Instead of attempting
2002 // to merge locations, simply hoist both copies of the intrinsic.
2003 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2004 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2005 // and leave any that were not hoisted behind (by calling moveBefore
2006 // rather than moveBeforePreserving).
2007 I1->moveBefore(TI);
2008 for (auto &SuccIter : OtherSuccIterRange) {
2009 auto *I2 = &*SuccIter++;
2010 assert(isa<DbgInfoIntrinsic>(I2));
2011 I2->moveBefore(TI);
2012 }
2013 } else {
2014 // For a normal instruction, we just move one to right before the
2015 // branch, then replace all uses of the other with the first. Finally,
2016 // we remove the now redundant second instruction.
2017 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2018 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2019 // and leave any that were not hoisted behind (by calling moveBefore
2020 // rather than moveBeforePreserving).
2021 I1->moveBefore(TI);
2022 for (auto &SuccIter : OtherSuccIterRange) {
2023 Instruction *I2 = &*SuccIter++;
2024 assert(I2 != I1);
2025 if (!I2->use_empty())
2026 I2->replaceAllUsesWith(I1);
2027 I1->andIRFlags(I2);
2028 if (auto *CB = dyn_cast<CallBase>(I1)) {
2029 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2030 assert(Success && "We should not be trying to hoist callbases "
2031 "with non-intersectable attributes");
2032 // For NDEBUG Compile.
2033 (void)Success;
2034 }
2035
2036 combineMetadataForCSE(I1, I2, true);
2037 // I1 and I2 are being combined into a single instruction. Its debug
2038 // location is the merged locations of the original instructions.
2039 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2040 I2->eraseFromParent();
2041 }
2042 }
2043 if (!Changed)
2044 NumHoistCommonCode += SuccIterPairs.size();
2045 Changed = true;
2046 NumHoistCommonInstrs += SuccIterPairs.size();
2047 } else {
2048 if (NumSkipped >= HoistCommonSkipLimit) {
2049 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2050 return Changed;
2051 }
2052 // We are about to skip over a pair of non-identical instructions. Record
2053 // if any have characteristics that would prevent reordering instructions
2054 // across them.
2055 for (auto &SuccIterPair : SuccIterPairs) {
2056 Instruction *I = &*SuccIterPair.first++;
2057 SuccIterPair.second |= skippedInstrFlags(I);
2058 }
2059 ++NumSkipped;
2060 }
2061 }
2062}
2063
2064bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2065 Instruction *TI, Instruction *I1,
2066 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2067
2068 auto *BI = dyn_cast<BranchInst>(TI);
2069
2070 bool Changed = false;
2071 BasicBlock *TIParent = TI->getParent();
2072 BasicBlock *BB1 = I1->getParent();
2073
2074 // Use only for an if statement.
2075 auto *I2 = *OtherSuccTIs.begin();
2076 auto *BB2 = I2->getParent();
2077 if (BI) {
2078 assert(OtherSuccTIs.size() == 1);
2079 assert(BI->getSuccessor(0) == I1->getParent());
2080 assert(BI->getSuccessor(1) == I2->getParent());
2081 }
2082
2083 // In the case of an if statement, we try to hoist an invoke.
2084 // FIXME: Can we define a safety predicate for CallBr?
2085 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2086 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2087 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2088 return false;
2089
2090 // TODO: callbr hoisting currently disabled pending further study.
2091 if (isa<CallBrInst>(I1))
2092 return false;
2093
2094 for (BasicBlock *Succ : successors(BB1)) {
2095 for (PHINode &PN : Succ->phis()) {
2096 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2097 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2098 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2099 if (BB1V == BB2V)
2100 continue;
2101
2102 // In the case of an if statement, check for
2103 // passingValueIsAlwaysUndefined here because we would rather eliminate
2104 // undefined control flow then converting it to a select.
2105 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2107 return false;
2108 }
2109 }
2110 }
2111
2112 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2113 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2114 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2115 // Clone the terminator and hoist it into the pred, without any debug info.
2116 Instruction *NT = I1->clone();
2117 NT->insertInto(TIParent, TI->getIterator());
2118 if (!NT->getType()->isVoidTy()) {
2119 I1->replaceAllUsesWith(NT);
2120 for (Instruction *OtherSuccTI : OtherSuccTIs)
2121 OtherSuccTI->replaceAllUsesWith(NT);
2122 NT->takeName(I1);
2123 }
2124 Changed = true;
2125 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2126
2127 // Ensure terminator gets a debug location, even an unknown one, in case
2128 // it involves inlinable calls.
2130 Locs.push_back(I1->getDebugLoc());
2131 for (auto *OtherSuccTI : OtherSuccTIs)
2132 Locs.push_back(OtherSuccTI->getDebugLoc());
2133 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
2134
2135 // PHIs created below will adopt NT's merged DebugLoc.
2136 IRBuilder<NoFolder> Builder(NT);
2137
2138 // In the case of an if statement, hoisting one of the terminators from our
2139 // successor is a great thing. Unfortunately, the successors of the if/else
2140 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2141 // must agree for all PHI nodes, so we insert select instruction to compute
2142 // the final result.
2143 if (BI) {
2144 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2145 for (BasicBlock *Succ : successors(BB1)) {
2146 for (PHINode &PN : Succ->phis()) {
2147 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2148 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2149 if (BB1V == BB2V)
2150 continue;
2151
2152 // These values do not agree. Insert a select instruction before NT
2153 // that determines the right value.
2154 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2155 if (!SI) {
2156 // Propagate fast-math-flags from phi node to its replacement select.
2157 SI = cast<SelectInst>(Builder.CreateSelectFMF(
2158 BI->getCondition(), BB1V, BB2V,
2159 isa<FPMathOperator>(PN) ? &PN : nullptr,
2160 BB1V->getName() + "." + BB2V->getName(), BI));
2161 }
2162
2163 // Make the PHI node use the select for all incoming values for BB1/BB2
2164 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2165 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2166 PN.setIncomingValue(i, SI);
2167 }
2168 }
2169 }
2170
2172
2173 // Update any PHI nodes in our new successors.
2174 for (BasicBlock *Succ : successors(BB1)) {
2175 addPredecessorToBlock(Succ, TIParent, BB1);
2176 if (DTU)
2177 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2178 }
2179
2180 if (DTU)
2181 for (BasicBlock *Succ : successors(TI))
2182 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2183
2185 if (DTU)
2186 DTU->applyUpdates(Updates);
2187 return Changed;
2188}
2189
2190// Check lifetime markers.
2191static bool isLifeTimeMarker(const Instruction *I) {
2192 if (auto II = dyn_cast<IntrinsicInst>(I)) {
2193 switch (II->getIntrinsicID()) {
2194 default:
2195 break;
2196 case Intrinsic::lifetime_start:
2197 case Intrinsic::lifetime_end:
2198 return true;
2199 }
2200 }
2201 return false;
2202}
2203
2204// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2205// into variables.
2207 int OpIdx) {
2208 // Divide/Remainder by constant is typically much cheaper than by variable.
2209 if (I->isIntDivRem())
2210 return OpIdx != 1;
2211 return !isa<IntrinsicInst>(I);
2212}
2213
2214// All instructions in Insts belong to different blocks that all unconditionally
2215// branch to a common successor. Analyze each instruction and return true if it
2216// would be possible to sink them into their successor, creating one common
2217// instruction instead. For every value that would be required to be provided by
2218// PHI node (because an operand varies in each input block), add to PHIOperands.
2221 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2222 // Prune out obviously bad instructions to move. Each instruction must have
2223 // the same number of uses, and we check later that the uses are consistent.
2224 std::optional<unsigned> NumUses;
2225 for (auto *I : Insts) {
2226 // These instructions may change or break semantics if moved.
2227 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2228 I->getType()->isTokenTy())
2229 return false;
2230
2231 // Do not try to sink an instruction in an infinite loop - it can cause
2232 // this algorithm to infinite loop.
2233 if (I->getParent()->getSingleSuccessor() == I->getParent())
2234 return false;
2235
2236 // Conservatively return false if I is an inline-asm instruction. Sinking
2237 // and merging inline-asm instructions can potentially create arguments
2238 // that cannot satisfy the inline-asm constraints.
2239 // If the instruction has nomerge or convergent attribute, return false.
2240 if (const auto *C = dyn_cast<CallBase>(I))
2241 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2242 return false;
2243
2244 if (!NumUses)
2245 NumUses = I->getNumUses();
2246 else if (NumUses != I->getNumUses())
2247 return false;
2248 }
2249
2250 const Instruction *I0 = Insts.front();
2251 const auto I0MMRA = MMRAMetadata(*I0);
2252 for (auto *I : Insts) {
2253 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2254 return false;
2255
2256 // swifterror pointers can only be used by a load or store; sinking a load
2257 // or store would require introducing a select for the pointer operand,
2258 // which isn't allowed for swifterror pointers.
2259 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
2260 return false;
2261 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
2262 return false;
2263
2264 // Treat MMRAs conservatively. This pass can be quite aggressive and
2265 // could drop a lot of MMRAs otherwise.
2266 if (MMRAMetadata(*I) != I0MMRA)
2267 return false;
2268 }
2269
2270 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2271 // then the other phi operands must match the instructions from Insts. This
2272 // also has to hold true for any phi nodes that would be created as a result
2273 // of sinking. Both of these cases are represented by PhiOperands.
2274 for (const Use &U : I0->uses()) {
2275 auto It = PHIOperands.find(&U);
2276 if (It == PHIOperands.end())
2277 // There may be uses in other blocks when sinking into a loop header.
2278 return false;
2279 if (!equal(Insts, It->second))
2280 return false;
2281 }
2282
2283 // For calls to be sinkable, they must all be indirect, or have same callee.
2284 // I.e. if we have two direct calls to different callees, we don't want to
2285 // turn that into an indirect call. Likewise, if we have an indirect call,
2286 // and a direct call, we don't actually want to have a single indirect call.
2287 if (isa<CallBase>(I0)) {
2288 auto IsIndirectCall = [](const Instruction *I) {
2289 return cast<CallBase>(I)->isIndirectCall();
2290 };
2291 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2292 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2293 if (HaveIndirectCalls) {
2294 if (!AllCallsAreIndirect)
2295 return false;
2296 } else {
2297 // All callees must be identical.
2298 Value *Callee = nullptr;
2299 for (const Instruction *I : Insts) {
2300 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2301 if (!Callee)
2302 Callee = CurrCallee;
2303 else if (Callee != CurrCallee)
2304 return false;
2305 }
2306 }
2307 }
2308
2309 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2310 Value *Op = I0->getOperand(OI);
2311 if (Op->getType()->isTokenTy())
2312 // Don't touch any operand of token type.
2313 return false;
2314
2315 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2316 assert(I->getNumOperands() == I0->getNumOperands());
2317 return I->getOperand(OI) == I0->getOperand(OI);
2318 };
2319 if (!all_of(Insts, SameAsI0)) {
2320 // SROA can't speculate lifetime markers of selects/phis, and the
2321 // backend may handle such lifetimes incorrectly as well (#104776).
2322 // Don't sink lifetimes if it would introduce a phi on the pointer
2323 // argument.
2324 if (isLifeTimeMarker(I0) && OI == 1 &&
2325 any_of(Insts, [](const Instruction *I) {
2326 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2327 }))
2328 return false;
2329
2330 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2332 // We can't create a PHI from this GEP.
2333 return false;
2334 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2335 for (auto *I : Insts)
2336 Ops.push_back(I->getOperand(OI));
2337 }
2338 }
2339 return true;
2340}
2341
2342// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2343// instruction of every block in Blocks to their common successor, commoning
2344// into one instruction.
2346 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2347
2348 // canSinkInstructions returning true guarantees that every block has at
2349 // least one non-terminator instruction.
2351 for (auto *BB : Blocks) {
2352 Instruction *I = BB->getTerminator();
2353 do {
2354 I = I->getPrevNode();
2355 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2356 if (!isa<DbgInfoIntrinsic>(I))
2357 Insts.push_back(I);
2358 }
2359
2360 // We don't need to do any more checking here; canSinkInstructions should
2361 // have done it all for us.
2362 SmallVector<Value*, 4> NewOperands;
2363 Instruction *I0 = Insts.front();
2364 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2365 // This check is different to that in canSinkInstructions. There, we
2366 // cared about the global view once simplifycfg (and instcombine) have
2367 // completed - it takes into account PHIs that become trivially
2368 // simplifiable. However here we need a more local view; if an operand
2369 // differs we create a PHI and rely on instcombine to clean up the very
2370 // small mess we may make.
2371 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2372 return I->getOperand(O) != I0->getOperand(O);
2373 });
2374 if (!NeedPHI) {
2375 NewOperands.push_back(I0->getOperand(O));
2376 continue;
2377 }
2378
2379 // Create a new PHI in the successor block and populate it.
2380 auto *Op = I0->getOperand(O);
2381 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2382 auto *PN =
2383 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2384 PN->insertBefore(BBEnd->begin());
2385 for (auto *I : Insts)
2386 PN->addIncoming(I->getOperand(O), I->getParent());
2387 NewOperands.push_back(PN);
2388 }
2389
2390 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2391 // and move it to the start of the successor block.
2392 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2393 I0->getOperandUse(O).set(NewOperands[O]);
2394
2395 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2396
2397 // Update metadata and IR flags, and merge debug locations.
2398 for (auto *I : Insts)
2399 if (I != I0) {
2400 // The debug location for the "common" instruction is the merged locations
2401 // of all the commoned instructions. We start with the original location
2402 // of the "common" instruction and iteratively merge each location in the
2403 // loop below.
2404 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2405 // However, as N-way merge for CallInst is rare, so we use simplified API
2406 // instead of using complex API for N-way merge.
2407 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2408 combineMetadataForCSE(I0, I, true);
2409 I0->andIRFlags(I);
2410 if (auto *CB = dyn_cast<CallBase>(I0)) {
2411 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2412 assert(Success && "We should not be trying to sink callbases "
2413 "with non-intersectable attributes");
2414 // For NDEBUG Compile.
2415 (void)Success;
2416 }
2417 }
2418
2419 for (User *U : make_early_inc_range(I0->users())) {
2420 // canSinkLastInstruction checked that all instructions are only used by
2421 // phi nodes in a way that allows replacing the phi node with the common
2422 // instruction.
2423 auto *PN = cast<PHINode>(U);
2424 PN->replaceAllUsesWith(I0);
2425 PN->eraseFromParent();
2426 }
2427
2428 // Finally nuke all instructions apart from the common instruction.
2429 for (auto *I : Insts) {
2430 if (I == I0)
2431 continue;
2432 // The remaining uses are debug users, replace those with the common inst.
2433 // In most (all?) cases this just introduces a use-before-def.
2434 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2435 I->replaceAllUsesWith(I0);
2436 I->eraseFromParent();
2437 }
2438}
2439
2440/// Check whether BB's predecessors end with unconditional branches. If it is
2441/// true, sink any common code from the predecessors to BB.
2443 DomTreeUpdater *DTU) {
2444 // We support two situations:
2445 // (1) all incoming arcs are unconditional
2446 // (2) there are non-unconditional incoming arcs
2447 //
2448 // (2) is very common in switch defaults and
2449 // else-if patterns;
2450 //
2451 // if (a) f(1);
2452 // else if (b) f(2);
2453 //
2454 // produces:
2455 //
2456 // [if]
2457 // / \
2458 // [f(1)] [if]
2459 // | | \
2460 // | | |
2461 // | [f(2)]|
2462 // \ | /
2463 // [ end ]
2464 //
2465 // [end] has two unconditional predecessor arcs and one conditional. The
2466 // conditional refers to the implicit empty 'else' arc. This conditional
2467 // arc can also be caused by an empty default block in a switch.
2468 //
2469 // In this case, we attempt to sink code from all *unconditional* arcs.
2470 // If we can sink instructions from these arcs (determined during the scan
2471 // phase below) we insert a common successor for all unconditional arcs and
2472 // connect that to [end], to enable sinking:
2473 //
2474 // [if]
2475 // / \
2476 // [x(1)] [if]
2477 // | | \
2478 // | | \
2479 // | [x(2)] |
2480 // \ / |
2481 // [sink.split] |
2482 // \ /
2483 // [ end ]
2484 //
2485 SmallVector<BasicBlock*,4> UnconditionalPreds;
2486 bool HaveNonUnconditionalPredecessors = false;
2487 for (auto *PredBB : predecessors(BB)) {
2488 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2489 if (PredBr && PredBr->isUnconditional())
2490 UnconditionalPreds.push_back(PredBB);
2491 else
2492 HaveNonUnconditionalPredecessors = true;
2493 }
2494 if (UnconditionalPreds.size() < 2)
2495 return false;
2496
2497 // We take a two-step approach to tail sinking. First we scan from the end of
2498 // each block upwards in lockstep. If the n'th instruction from the end of each
2499 // block can be sunk, those instructions are added to ValuesToSink and we
2500 // carry on. If we can sink an instruction but need to PHI-merge some operands
2501 // (because they're not identical in each instruction) we add these to
2502 // PHIOperands.
2503 // We prepopulate PHIOperands with the phis that already exist in BB.
2505 for (PHINode &PN : BB->phis()) {
2507 for (const Use &U : PN.incoming_values())
2508 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2509 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2510 for (BasicBlock *Pred : UnconditionalPreds)
2511 Ops.push_back(*IncomingVals[Pred]);
2512 }
2513
2514 int ScanIdx = 0;
2515 SmallPtrSet<Value*,4> InstructionsToSink;
2516 LockstepReverseIterator LRI(UnconditionalPreds);
2517 while (LRI.isValid() &&
2518 canSinkInstructions(*LRI, PHIOperands)) {
2519 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2520 << "\n");
2521 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2522 ++ScanIdx;
2523 --LRI;
2524 }
2525
2526 // If no instructions can be sunk, early-return.
2527 if (ScanIdx == 0)
2528 return false;
2529
2530 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2531
2532 if (!followedByDeoptOrUnreachable) {
2533 // Check whether this is the pointer operand of a load/store.
2534 auto IsMemOperand = [](Use &U) {
2535 auto *I = cast<Instruction>(U.getUser());
2536 if (isa<LoadInst>(I))
2537 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2538 if (isa<StoreInst>(I))
2539 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2540 return false;
2541 };
2542
2543 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2544 // actually sink before encountering instruction that is unprofitable to
2545 // sink?
2546 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2547 unsigned NumPHIInsts = 0;
2548 for (Use &U : (*LRI)[0]->operands()) {
2549 auto It = PHIOperands.find(&U);
2550 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2551 return InstructionsToSink.contains(V);
2552 })) {
2553 ++NumPHIInsts;
2554 // Do not separate a load/store from the gep producing the address.
2555 // The gep can likely be folded into the load/store as an addressing
2556 // mode. Additionally, a load of a gep is easier to analyze than a
2557 // load of a phi.
2558 if (IsMemOperand(U) &&
2559 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2560 return false;
2561 // FIXME: this check is overly optimistic. We may end up not sinking
2562 // said instruction, due to the very same profitability check.
2563 // See @creating_too_many_phis in sink-common-code.ll.
2564 }
2565 }
2566 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2567 return NumPHIInsts <= 1;
2568 };
2569
2570 // We've determined that we are going to sink last ScanIdx instructions,
2571 // and recorded them in InstructionsToSink. Now, some instructions may be
2572 // unprofitable to sink. But that determination depends on the instructions
2573 // that we are going to sink.
2574
2575 // First, forward scan: find the first instruction unprofitable to sink,
2576 // recording all the ones that are profitable to sink.
2577 // FIXME: would it be better, after we detect that not all are profitable.
2578 // to either record the profitable ones, or erase the unprofitable ones?
2579 // Maybe we need to choose (at runtime) the one that will touch least
2580 // instrs?
2581 LRI.reset();
2582 int Idx = 0;
2583 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2584 while (Idx < ScanIdx) {
2585 if (!ProfitableToSinkInstruction(LRI)) {
2586 // Too many PHIs would be created.
2587 LLVM_DEBUG(
2588 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2589 break;
2590 }
2591 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2592 --LRI;
2593 ++Idx;
2594 }
2595
2596 // If no instructions can be sunk, early-return.
2597 if (Idx == 0)
2598 return false;
2599
2600 // Did we determine that (only) some instructions are unprofitable to sink?
2601 if (Idx < ScanIdx) {
2602 // Okay, some instructions are unprofitable.
2603 ScanIdx = Idx;
2604 InstructionsToSink = InstructionsProfitableToSink;
2605
2606 // But, that may make other instructions unprofitable, too.
2607 // So, do a backward scan, do any earlier instructions become
2608 // unprofitable?
2609 assert(
2610 !ProfitableToSinkInstruction(LRI) &&
2611 "We already know that the last instruction is unprofitable to sink");
2612 ++LRI;
2613 --Idx;
2614 while (Idx >= 0) {
2615 // If we detect that an instruction becomes unprofitable to sink,
2616 // all earlier instructions won't be sunk either,
2617 // so preemptively keep InstructionsProfitableToSink in sync.
2618 // FIXME: is this the most performant approach?
2619 for (auto *I : *LRI)
2620 InstructionsProfitableToSink.erase(I);
2621 if (!ProfitableToSinkInstruction(LRI)) {
2622 // Everything starting with this instruction won't be sunk.
2623 ScanIdx = Idx;
2624 InstructionsToSink = InstructionsProfitableToSink;
2625 }
2626 ++LRI;
2627 --Idx;
2628 }
2629 }
2630
2631 // If no instructions can be sunk, early-return.
2632 if (ScanIdx == 0)
2633 return false;
2634 }
2635
2636 bool Changed = false;
2637
2638 if (HaveNonUnconditionalPredecessors) {
2639 if (!followedByDeoptOrUnreachable) {
2640 // It is always legal to sink common instructions from unconditional
2641 // predecessors. However, if not all predecessors are unconditional,
2642 // this transformation might be pessimizing. So as a rule of thumb,
2643 // don't do it unless we'd sink at least one non-speculatable instruction.
2644 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2645 LRI.reset();
2646 int Idx = 0;
2647 bool Profitable = false;
2648 while (Idx < ScanIdx) {
2649 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2650 Profitable = true;
2651 break;
2652 }
2653 --LRI;
2654 ++Idx;
2655 }
2656 if (!Profitable)
2657 return false;
2658 }
2659
2660 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2661 // We have a conditional edge and we're going to sink some instructions.
2662 // Insert a new block postdominating all blocks we're going to sink from.
2663 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2664 // Edges couldn't be split.
2665 return false;
2666 Changed = true;
2667 }
2668
2669 // Now that we've analyzed all potential sinking candidates, perform the
2670 // actual sink. We iteratively sink the last non-terminator of the source
2671 // blocks into their common successor unless doing so would require too
2672 // many PHI instructions to be generated (currently only one PHI is allowed
2673 // per sunk instruction).
2674 //
2675 // We can use InstructionsToSink to discount values needing PHI-merging that will
2676 // actually be sunk in a later iteration. This allows us to be more
2677 // aggressive in what we sink. This does allow a false positive where we
2678 // sink presuming a later value will also be sunk, but stop half way through
2679 // and never actually sink it which means we produce more PHIs than intended.
2680 // This is unlikely in practice though.
2681 int SinkIdx = 0;
2682 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2683 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2684 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2685 << "\n");
2686
2687 // Because we've sunk every instruction in turn, the current instruction to
2688 // sink is always at index 0.
2689 LRI.reset();
2690
2691 sinkLastInstruction(UnconditionalPreds);
2692 NumSinkCommonInstrs++;
2693 Changed = true;
2694 }
2695 if (SinkIdx != 0)
2696 ++NumSinkCommonCode;
2697 return Changed;
2698}
2699
2700namespace {
2701
2702struct CompatibleSets {
2703 using SetTy = SmallVector<InvokeInst *, 2>;
2704
2706
2707 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2708
2709 SetTy &getCompatibleSet(InvokeInst *II);
2710
2711 void insert(InvokeInst *II);
2712};
2713
2714CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2715 // Perform a linear scan over all the existing sets, see if the new `invoke`
2716 // is compatible with any particular set. Since we know that all the `invokes`
2717 // within a set are compatible, only check the first `invoke` in each set.
2718 // WARNING: at worst, this has quadratic complexity.
2719 for (CompatibleSets::SetTy &Set : Sets) {
2720 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2721 return Set;
2722 }
2723
2724 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2725 return Sets.emplace_back();
2726}
2727
2728void CompatibleSets::insert(InvokeInst *II) {
2729 getCompatibleSet(II).emplace_back(II);
2730}
2731
2732bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2733 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2734
2735 // Can we theoretically merge these `invoke`s?
2736 auto IsIllegalToMerge = [](InvokeInst *II) {
2737 return II->cannotMerge() || II->isInlineAsm();
2738 };
2739 if (any_of(Invokes, IsIllegalToMerge))
2740 return false;
2741
2742 // Either both `invoke`s must be direct,
2743 // or both `invoke`s must be indirect.
2744 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2745 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2746 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2747 if (HaveIndirectCalls) {
2748 if (!AllCallsAreIndirect)
2749 return false;
2750 } else {
2751 // All callees must be identical.
2752 Value *Callee = nullptr;
2753 for (InvokeInst *II : Invokes) {
2754 Value *CurrCallee = II->getCalledOperand();
2755 assert(CurrCallee && "There is always a called operand.");
2756 if (!Callee)
2757 Callee = CurrCallee;
2758 else if (Callee != CurrCallee)
2759 return false;
2760 }
2761 }
2762
2763 // Either both `invoke`s must not have a normal destination,
2764 // or both `invoke`s must have a normal destination,
2765 auto HasNormalDest = [](InvokeInst *II) {
2766 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2767 };
2768 if (any_of(Invokes, HasNormalDest)) {
2769 // Do not merge `invoke` that does not have a normal destination with one
2770 // that does have a normal destination, even though doing so would be legal.
2771 if (!all_of(Invokes, HasNormalDest))
2772 return false;
2773
2774 // All normal destinations must be identical.
2775 BasicBlock *NormalBB = nullptr;
2776 for (InvokeInst *II : Invokes) {
2777 BasicBlock *CurrNormalBB = II->getNormalDest();
2778 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2779 if (!NormalBB)
2780 NormalBB = CurrNormalBB;
2781 else if (NormalBB != CurrNormalBB)
2782 return false;
2783 }
2784
2785 // In the normal destination, the incoming values for these two `invoke`s
2786 // must be compatible.
2787 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2789 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2790 &EquivalenceSet))
2791 return false;
2792 }
2793
2794#ifndef NDEBUG
2795 // All unwind destinations must be identical.
2796 // We know that because we have started from said unwind destination.
2797 BasicBlock *UnwindBB = nullptr;
2798 for (InvokeInst *II : Invokes) {
2799 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2800 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2801 if (!UnwindBB)
2802 UnwindBB = CurrUnwindBB;
2803 else
2804 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2805 }
2806#endif
2807
2808 // In the unwind destination, the incoming values for these two `invoke`s
2809 // must be compatible.
2811 Invokes.front()->getUnwindDest(),
2812 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2813 return false;
2814
2815 // Ignoring arguments, these `invoke`s must be identical,
2816 // including operand bundles.
2817 const InvokeInst *II0 = Invokes.front();
2818 for (auto *II : Invokes.drop_front())
2819 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2820 return false;
2821
2822 // Can we theoretically form the data operands for the merged `invoke`?
2823 auto IsIllegalToMergeArguments = [](auto Ops) {
2824 Use &U0 = std::get<0>(Ops);
2825 Use &U1 = std::get<1>(Ops);
2826 if (U0 == U1)
2827 return false;
2828 return U0->getType()->isTokenTy() ||
2829 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2830 U0.getOperandNo());
2831 };
2832 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2833 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2834 IsIllegalToMergeArguments))
2835 return false;
2836
2837 return true;
2838}
2839
2840} // namespace
2841
2842// Merge all invokes in the provided set, all of which are compatible
2843// as per the `CompatibleSets::shouldBelongToSameSet()`.
2845 DomTreeUpdater *DTU) {
2846 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2847
2849 if (DTU)
2850 Updates.reserve(2 + 3 * Invokes.size());
2851
2852 bool HasNormalDest =
2853 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2854
2855 // Clone one of the invokes into a new basic block.
2856 // Since they are all compatible, it doesn't matter which invoke is cloned.
2857 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2858 InvokeInst *II0 = Invokes.front();
2859 BasicBlock *II0BB = II0->getParent();
2860 BasicBlock *InsertBeforeBlock =
2861 II0->getParent()->getIterator()->getNextNode();
2862 Function *Func = II0BB->getParent();
2863 LLVMContext &Ctx = II0->getContext();
2864
2865 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2866 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2867
2868 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2869 // NOTE: all invokes have the same attributes, so no handling needed.
2870 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2871
2872 if (!HasNormalDest) {
2873 // This set does not have a normal destination,
2874 // so just form a new block with unreachable terminator.
2875 BasicBlock *MergedNormalDest = BasicBlock::Create(
2876 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2877 new UnreachableInst(Ctx, MergedNormalDest);
2878 MergedInvoke->setNormalDest(MergedNormalDest);
2879 }
2880
2881 // The unwind destination, however, remainds identical for all invokes here.
2882
2883 return MergedInvoke;
2884 }();
2885
2886 if (DTU) {
2887 // Predecessor blocks that contained these invokes will now branch to
2888 // the new block that contains the merged invoke, ...
2889 for (InvokeInst *II : Invokes)
2890 Updates.push_back(
2891 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2892
2893 // ... which has the new `unreachable` block as normal destination,
2894 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2895 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2896 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2897 SuccBBOfMergedInvoke});
2898
2899 // Since predecessor blocks now unconditionally branch to a new block,
2900 // they no longer branch to their original successors.
2901 for (InvokeInst *II : Invokes)
2902 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2903 Updates.push_back(
2904 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2905 }
2906
2907 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2908
2909 // Form the merged operands for the merged invoke.
2910 for (Use &U : MergedInvoke->operands()) {
2911 // Only PHI together the indirect callees and data operands.
2912 if (MergedInvoke->isCallee(&U)) {
2913 if (!IsIndirectCall)
2914 continue;
2915 } else if (!MergedInvoke->isDataOperand(&U))
2916 continue;
2917
2918 // Don't create trivial PHI's with all-identical incoming values.
2919 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2920 return II->getOperand(U.getOperandNo()) != U.get();
2921 });
2922 if (!NeedPHI)
2923 continue;
2924
2925 // Form a PHI out of all the data ops under this index.
2927 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2928 for (InvokeInst *II : Invokes)
2929 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2930
2931 U.set(PN);
2932 }
2933
2934 // We've ensured that each PHI node has compatible (identical) incoming values
2935 // when coming from each of the `invoke`s in the current merge set,
2936 // so update the PHI nodes accordingly.
2937 for (BasicBlock *Succ : successors(MergedInvoke))
2938 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2939 /*ExistPred=*/Invokes.front()->getParent());
2940
2941 // And finally, replace the original `invoke`s with an unconditional branch
2942 // to the block with the merged `invoke`. Also, give that merged `invoke`
2943 // the merged debugloc of all the original `invoke`s.
2944 DILocation *MergedDebugLoc = nullptr;
2945 for (InvokeInst *II : Invokes) {
2946 // Compute the debug location common to all the original `invoke`s.
2947 if (!MergedDebugLoc)
2948 MergedDebugLoc = II->getDebugLoc();
2949 else
2950 MergedDebugLoc =
2951 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2952
2953 // And replace the old `invoke` with an unconditionally branch
2954 // to the block with the merged `invoke`.
2955 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2956 OrigSuccBB->removePredecessor(II->getParent());
2957 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2958 // The unconditional branch is part of the replacement for the original
2959 // invoke, so should use its DebugLoc.
2960 BI->setDebugLoc(II->getDebugLoc());
2961 bool Success = MergedInvoke->tryIntersectAttributes(II);
2962 assert(Success && "Merged invokes with incompatible attributes");
2963 // For NDEBUG Compile
2964 (void)Success;
2965 II->replaceAllUsesWith(MergedInvoke);
2966 II->eraseFromParent();
2967 ++NumInvokesMerged;
2968 }
2969 MergedInvoke->setDebugLoc(MergedDebugLoc);
2970 ++NumInvokeSetsFormed;
2971
2972 if (DTU)
2973 DTU->applyUpdates(Updates);
2974}
2975
2976/// If this block is a `landingpad` exception handling block, categorize all
2977/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2978/// being "mergeable" together, and then merge invokes in each set together.
2979///
2980/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2981/// [...] [...]
2982/// | |
2983/// [invoke0] [invoke1]
2984/// / \ / \
2985/// [cont0] [landingpad] [cont1]
2986/// to:
2987/// [...] [...]
2988/// \ /
2989/// [invoke]
2990/// / \
2991/// [cont] [landingpad]
2992///
2993/// But of course we can only do that if the invokes share the `landingpad`,
2994/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2995/// and the invoked functions are "compatible".
2998 return false;
2999
3000 bool Changed = false;
3001
3002 // FIXME: generalize to all exception handling blocks?
3003 if (!BB->isLandingPad())
3004 return Changed;
3005
3006 CompatibleSets Grouper;
3007
3008 // Record all the predecessors of this `landingpad`. As per verifier,
3009 // the only allowed predecessor is the unwind edge of an `invoke`.
3010 // We want to group "compatible" `invokes` into the same set to be merged.
3011 for (BasicBlock *PredBB : predecessors(BB))
3012 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
3013
3014 // And now, merge `invoke`s that were grouped togeter.
3015 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
3016 if (Invokes.size() < 2)
3017 continue;
3018 Changed = true;
3019 mergeCompatibleInvokesImpl(Invokes, DTU);
3020 }
3021
3022 return Changed;
3023}
3024
3025namespace {
3026/// Track ephemeral values, which should be ignored for cost-modelling
3027/// purposes. Requires walking instructions in reverse order.
3028class EphemeralValueTracker {
3030
3031 bool isEphemeral(const Instruction *I) {
3032 if (isa<AssumeInst>(I))
3033 return true;
3034 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3035 all_of(I->users(), [&](const User *U) {
3036 return EphValues.count(cast<Instruction>(U));
3037 });
3038 }
3039
3040public:
3041 bool track(const Instruction *I) {
3042 if (isEphemeral(I)) {
3043 EphValues.insert(I);
3044 return true;
3045 }
3046 return false;
3047 }
3048
3049 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3050};
3051} // namespace
3052
3053/// Determine if we can hoist sink a sole store instruction out of a
3054/// conditional block.
3055///
3056/// We are looking for code like the following:
3057/// BrBB:
3058/// store i32 %add, i32* %arrayidx2
3059/// ... // No other stores or function calls (we could be calling a memory
3060/// ... // function).
3061/// %cmp = icmp ult %x, %y
3062/// br i1 %cmp, label %EndBB, label %ThenBB
3063/// ThenBB:
3064/// store i32 %add5, i32* %arrayidx2
3065/// br label EndBB
3066/// EndBB:
3067/// ...
3068/// We are going to transform this into:
3069/// BrBB:
3070/// store i32 %add, i32* %arrayidx2
3071/// ... //
3072/// %cmp = icmp ult %x, %y
3073/// %add.add5 = select i1 %cmp, i32 %add, %add5
3074/// store i32 %add.add5, i32* %arrayidx2
3075/// ...
3076///
3077/// \return The pointer to the value of the previous store if the store can be
3078/// hoisted into the predecessor block. 0 otherwise.
3080 BasicBlock *StoreBB, BasicBlock *EndBB) {
3081 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3082 if (!StoreToHoist)
3083 return nullptr;
3084
3085 // Volatile or atomic.
3086 if (!StoreToHoist->isSimple())
3087 return nullptr;
3088
3089 Value *StorePtr = StoreToHoist->getPointerOperand();
3090 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3091
3092 // Look for a store to the same pointer in BrBB.
3093 unsigned MaxNumInstToLookAt = 9;
3094 // Skip pseudo probe intrinsic calls which are not really killing any memory
3095 // accesses.
3096 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3097 if (!MaxNumInstToLookAt)
3098 break;
3099 --MaxNumInstToLookAt;
3100
3101 // Could be calling an instruction that affects memory like free().
3102 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3103 return nullptr;
3104
3105 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3106 // Found the previous store to same location and type. Make sure it is
3107 // simple, to avoid introducing a spurious non-atomic write after an
3108 // atomic write.
3109 if (SI->getPointerOperand() == StorePtr &&
3110 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3111 SI->getAlign() >= StoreToHoist->getAlign())
3112 // Found the previous store, return its value operand.
3113 return SI->getValueOperand();
3114 return nullptr; // Unknown store.
3115 }
3116
3117 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3118 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3119 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3120 Value *Obj = getUnderlyingObject(StorePtr);
3121 bool ExplicitlyDereferenceableOnly;
3122 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3123 !PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3124 /*StoreCaptures=*/true) &&
3125 (!ExplicitlyDereferenceableOnly ||
3126 isDereferenceablePointer(StorePtr, StoreTy,
3127 LI->getDataLayout()))) {
3128 // Found a previous load, return it.
3129 return LI;
3130 }
3131 }
3132 // The load didn't work out, but we may still find a store.
3133 }
3134 }
3135
3136 return nullptr;
3137}
3138
3139/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3140/// converted to selects.
3142 BasicBlock *EndBB,
3143 unsigned &SpeculatedInstructions,
3145 const TargetTransformInfo &TTI) {
3147 BB->getParent()->hasMinSize()
3150
3151 bool HaveRewritablePHIs = false;
3152 for (PHINode &PN : EndBB->phis()) {
3153 Value *OrigV = PN.getIncomingValueForBlock(BB);
3154 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3155
3156 // FIXME: Try to remove some of the duplication with
3157 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3158 if (ThenV == OrigV)
3159 continue;
3160
3161 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
3163
3164 // Don't convert to selects if we could remove undefined behavior instead.
3165 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3167 return false;
3168
3169 HaveRewritablePHIs = true;
3170 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3171 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3172 if (!OrigCE && !ThenCE)
3173 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3174
3175 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3176 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3177 InstructionCost MaxCost =
3179 if (OrigCost + ThenCost > MaxCost)
3180 return false;
3181
3182 // Account for the cost of an unfolded ConstantExpr which could end up
3183 // getting expanded into Instructions.
3184 // FIXME: This doesn't account for how many operations are combined in the
3185 // constant expression.
3186 ++SpeculatedInstructions;
3187 if (SpeculatedInstructions > 1)
3188 return false;
3189 }
3190
3191 return HaveRewritablePHIs;
3192}
3193
3195 std::optional<bool> Invert,
3196 const TargetTransformInfo &TTI) {
3197 // If the branch is non-unpredictable, and is predicted to *not* branch to
3198 // the `then` block, then avoid speculating it.
3199 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3200 return true;
3201
3202 uint64_t TWeight, FWeight;
3203 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3204 return true;
3205
3206 if (!Invert.has_value())
3207 return false;
3208
3209 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3210 BranchProbability BIEndProb =
3211 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3213 return BIEndProb < Likely;
3214}
3215
3216/// Speculate a conditional basic block flattening the CFG.
3217///
3218/// Note that this is a very risky transform currently. Speculating
3219/// instructions like this is most often not desirable. Instead, there is an MI
3220/// pass which can do it with full awareness of the resource constraints.
3221/// However, some cases are "obvious" and we should do directly. An example of
3222/// this is speculating a single, reasonably cheap instruction.
3223///
3224/// There is only one distinct advantage to flattening the CFG at the IR level:
3225/// it makes very common but simplistic optimizations such as are common in
3226/// instcombine and the DAG combiner more powerful by removing CFG edges and
3227/// modeling their effects with easier to reason about SSA value graphs.
3228///
3229///
3230/// An illustration of this transform is turning this IR:
3231/// \code
3232/// BB:
3233/// %cmp = icmp ult %x, %y
3234/// br i1 %cmp, label %EndBB, label %ThenBB
3235/// ThenBB:
3236/// %sub = sub %x, %y
3237/// br label BB2
3238/// EndBB:
3239/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3240/// ...
3241/// \endcode
3242///
3243/// Into this IR:
3244/// \code
3245/// BB:
3246/// %cmp = icmp ult %x, %y
3247/// %sub = sub %x, %y
3248/// %cond = select i1 %cmp, 0, %sub
3249/// ...
3250/// \endcode
3251///
3252/// \returns true if the conditional block is removed.
3253bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3254 BasicBlock *ThenBB) {
3255 if (!Options.SpeculateBlocks)
3256 return false;
3257
3258 // Be conservative for now. FP select instruction can often be expensive.
3259 Value *BrCond = BI->getCondition();
3260 if (isa<FCmpInst>(BrCond))
3261 return false;
3262
3263 BasicBlock *BB = BI->getParent();
3264 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3265 InstructionCost Budget =
3267
3268 // If ThenBB is actually on the false edge of the conditional branch, remember
3269 // to swap the select operands later.
3270 bool Invert = false;
3271 if (ThenBB != BI->getSuccessor(0)) {
3272 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3273 Invert = true;
3274 }
3275 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3276
3277 if (!isProfitableToSpeculate(BI, Invert, TTI))
3278 return false;
3279
3280 // Keep a count of how many times instructions are used within ThenBB when
3281 // they are candidates for sinking into ThenBB. Specifically:
3282 // - They are defined in BB, and
3283 // - They have no side effects, and
3284 // - All of their uses are in ThenBB.
3285 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3286
3287 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3288
3289 unsigned SpeculatedInstructions = 0;
3290 bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3291 Options.HoistLoadsStoresWithCondFaulting;
3292 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3293 Value *SpeculatedStoreValue = nullptr;
3294 StoreInst *SpeculatedStore = nullptr;
3295 EphemeralValueTracker EphTracker;
3296 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3297 // Skip debug info.
3298 if (isa<DbgInfoIntrinsic>(I)) {
3299 SpeculatedDbgIntrinsics.push_back(&I);
3300 continue;
3301 }
3302
3303 // Skip pseudo probes. The consequence is we lose track of the branch
3304 // probability for ThenBB, which is fine since the optimization here takes
3305 // place regardless of the branch probability.
3306 if (isa<PseudoProbeInst>(I)) {
3307 // The probe should be deleted so that it will not be over-counted when
3308 // the samples collected on the non-conditional path are counted towards
3309 // the conditional path. We leave it for the counts inference algorithm to
3310 // figure out a proper count for an unknown probe.
3311 SpeculatedDbgIntrinsics.push_back(&I);
3312 continue;
3313 }
3314
3315 // Ignore ephemeral values, they will be dropped by the transform.
3316 if (EphTracker.track(&I))
3317 continue;
3318
3319 // Only speculatively execute a single instruction (not counting the
3320 // terminator) for now.
3321 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3323 SpeculatedConditionalLoadsStores.size() <
3325 // Not count load/store into cost if target supports conditional faulting
3326 // b/c it's cheap to speculate it.
3327 if (IsSafeCheapLoadStore)
3328 SpeculatedConditionalLoadsStores.push_back(&I);
3329 else
3330 ++SpeculatedInstructions;
3331
3332 if (SpeculatedInstructions > 1)
3333 return false;
3334
3335 // Don't hoist the instruction if it's unsafe or expensive.
3336 if (!IsSafeCheapLoadStore &&
3338 !(HoistCondStores && !SpeculatedStoreValue &&
3339 (SpeculatedStoreValue =
3340 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3341 return false;
3342 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3345 return false;
3346
3347 // Store the store speculation candidate.
3348 if (!SpeculatedStore && SpeculatedStoreValue)
3349 SpeculatedStore = cast<StoreInst>(&I);
3350
3351 // Do not hoist the instruction if any of its operands are defined but not
3352 // used in BB. The transformation will prevent the operand from
3353 // being sunk into the use block.
3354 for (Use &Op : I.operands()) {
3355 Instruction *OpI = dyn_cast<Instruction>(Op);
3356 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3357 continue; // Not a candidate for sinking.
3358
3359 ++SinkCandidateUseCounts[OpI];
3360 }
3361 }
3362
3363 // Consider any sink candidates which are only used in ThenBB as costs for
3364 // speculation. Note, while we iterate over a DenseMap here, we are summing
3365 // and so iteration order isn't significant.
3366 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3367 if (Inst->hasNUses(Count)) {
3368 ++SpeculatedInstructions;
3369 if (SpeculatedInstructions > 1)
3370 return false;
3371 }
3372
3373 // Check that we can insert the selects and that it's not too expensive to do
3374 // so.
3375 bool Convert =
3376 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3378 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3379 SpeculatedInstructions, Cost, TTI);
3380 if (!Convert || Cost > Budget)
3381 return false;
3382
3383 // If we get here, we can hoist the instruction and if-convert.
3384 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3385
3386 // Insert a select of the value of the speculated store.
3387 if (SpeculatedStoreValue) {
3388 IRBuilder<NoFolder> Builder(BI);
3389 Value *OrigV = SpeculatedStore->getValueOperand();
3390 Value *TrueV = SpeculatedStore->getValueOperand();
3391 Value *FalseV = SpeculatedStoreValue;
3392 if (Invert)
3393 std::swap(TrueV, FalseV);
3394 Value *S = Builder.CreateSelect(
3395 BrCond, TrueV, FalseV, "spec.store.select", BI);
3396 SpeculatedStore->setOperand(0, S);
3397 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3398 SpeculatedStore->getDebugLoc());
3399 // The value stored is still conditional, but the store itself is now
3400 // unconditonally executed, so we must be sure that any linked dbg.assign
3401 // intrinsics are tracking the new stored value (the result of the
3402 // select). If we don't, and the store were to be removed by another pass
3403 // (e.g. DSE), then we'd eventually end up emitting a location describing
3404 // the conditional value, unconditionally.
3405 //
3406 // === Before this transformation ===
3407 // pred:
3408 // store %one, %x.dest, !DIAssignID !1
3409 // dbg.assign %one, "x", ..., !1, ...
3410 // br %cond if.then
3411 //
3412 // if.then:
3413 // store %two, %x.dest, !DIAssignID !2
3414 // dbg.assign %two, "x", ..., !2, ...
3415 //
3416 // === After this transformation ===
3417 // pred:
3418 // store %one, %x.dest, !DIAssignID !1
3419 // dbg.assign %one, "x", ..., !1
3420 /// ...
3421 // %merge = select %cond, %two, %one
3422 // store %merge, %x.dest, !DIAssignID !2
3423 // dbg.assign %merge, "x", ..., !2
3424 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3425 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3426 DbgAssign->replaceVariableLocationOp(OrigV, S);
3427 };
3428 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3429 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3430 }
3431
3432 // Metadata can be dependent on the condition we are hoisting above.
3433 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3434 // to avoid making it appear as if the condition is a constant, which would
3435 // be misleading while debugging.
3436 // Similarly strip attributes that maybe dependent on condition we are
3437 // hoisting above.
3438 for (auto &I : make_early_inc_range(*ThenBB)) {
3439 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3440 // Don't update the DILocation of dbg.assign intrinsics.
3441 if (!isa<DbgAssignIntrinsic>(&I))
3442 I.setDebugLoc(DebugLoc());
3443 }
3444 I.dropUBImplyingAttrsAndMetadata();
3445
3446 // Drop ephemeral values.
3447 if (EphTracker.contains(&I)) {
3448 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3449 I.eraseFromParent();
3450 }
3451 }
3452
3453 // Hoist the instructions.
3454 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3455 // to these instructions, in the same way that dbg.value intrinsics are
3456 // dropped at the end of this block.
3457 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3458 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3459 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3460 // equivalent).
3461 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3462 !DVR || !DVR->isDbgAssign())
3463 It.dropOneDbgRecord(&DR);
3464 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3465 std::prev(ThenBB->end()));
3466
3467 if (!SpeculatedConditionalLoadsStores.empty())
3468 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
3469
3470 // Insert selects and rewrite the PHI operands.
3471 IRBuilder<NoFolder> Builder(BI);
3472 for (PHINode &PN : EndBB->phis()) {
3473 unsigned OrigI = PN.getBasicBlockIndex(BB);
3474 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3475 Value *OrigV = PN.getIncomingValue(OrigI);
3476 Value *ThenV = PN.getIncomingValue(ThenI);
3477
3478 // Skip PHIs which are trivial.
3479 if (OrigV == ThenV)
3480 continue;
3481
3482 // Create a select whose true value is the speculatively executed value and
3483 // false value is the pre-existing value. Swap them if the branch
3484 // destinations were inverted.
3485 Value *TrueV = ThenV, *FalseV = OrigV;
3486 if (Invert)
3487 std::swap(TrueV, FalseV);
3488 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3489 PN.setIncomingValue(OrigI, V);
3490 PN.setIncomingValue(ThenI, V);
3491 }
3492
3493 // Remove speculated dbg intrinsics.
3494 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3495 // dbg value for the different flows and inserting it after the select.
3496 for (Instruction *I : SpeculatedDbgIntrinsics) {
3497 // We still want to know that an assignment took place so don't remove
3498 // dbg.assign intrinsics.
3499 if (!isa<DbgAssignIntrinsic>(I))
3500 I->eraseFromParent();
3501 }
3502
3503 ++NumSpeculations;
3504 return true;
3505}
3506
3507/// Return true if we can thread a branch across this block.
3509 int Size = 0;
3510 EphemeralValueTracker EphTracker;
3511
3512 // Walk the loop in reverse so that we can identify ephemeral values properly
3513 // (values only feeding assumes).
3514 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3515 // Can't fold blocks that contain noduplicate or convergent calls.
3516 if (CallInst *CI = dyn_cast<CallInst>(&I))
3517 if (CI->cannotDuplicate() || CI->isConvergent())
3518 return false;
3519
3520 // Ignore ephemeral values which are deleted during codegen.
3521 // We will delete Phis while threading, so Phis should not be accounted in
3522 // block's size.
3523 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3524 if (Size++ > MaxSmallBlockSize)
3525 return false; // Don't clone large BB's.
3526 }
3527
3528 // We can only support instructions that do not define values that are
3529 // live outside of the current basic block.
3530 for (User *U : I.users()) {
3531 Instruction *UI = cast<Instruction>(U);
3532 if (UI->getParent() != BB || isa<PHINode>(UI))
3533 return false;
3534 }
3535
3536 // Looks ok, continue checking.
3537 }
3538
3539 return true;
3540}
3541
3543 BasicBlock *To) {
3544 // Don't look past the block defining the value, we might get the value from
3545 // a previous loop iteration.
3546 auto *I = dyn_cast<Instruction>(V);
3547 if (I && I->getParent() == To)
3548 return nullptr;
3549
3550 // We know the value if the From block branches on it.
3551 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3552 if (BI && BI->isConditional() && BI->getCondition() == V &&
3553 BI->getSuccessor(0) != BI->getSuccessor(1))
3554 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3556
3557 return nullptr;
3558}
3559
3560/// If we have a conditional branch on something for which we know the constant
3561/// value in predecessors (e.g. a phi node in the current block), thread edges
3562/// from the predecessor to their ultimate destination.
3563static std::optional<bool>
3565 const DataLayout &DL,
3566 AssumptionCache *AC) {
3568 BasicBlock *BB = BI->getParent();
3569 Value *Cond = BI->getCondition();
3570 PHINode *PN = dyn_cast<PHINode>(Cond);
3571 if (PN && PN->getParent() == BB) {
3572 // Degenerate case of a single entry PHI.
3573 if (PN->getNumIncomingValues() == 1) {
3575 return true;
3576 }
3577
3578 for (Use &U : PN->incoming_values())
3579 if (auto *CB = dyn_cast<ConstantInt>(U))
3580 KnownValues[CB].insert(PN->getIncomingBlock(U));
3581 } else {
3582 for (BasicBlock *Pred : predecessors(BB)) {
3583 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3584 KnownValues[CB].insert(Pred);
3585 }
3586 }
3587
3588 if (KnownValues.empty())
3589 return false;
3590
3591 // Now we know that this block has multiple preds and two succs.
3592 // Check that the block is small enough and values defined in the block are
3593 // not used outside of it.
3595 return false;
3596
3597 for (const auto &Pair : KnownValues) {
3598 // Okay, we now know that all edges from PredBB should be revectored to
3599 // branch to RealDest.
3600 ConstantInt *CB = Pair.first;
3601 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3602 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3603
3604 if (RealDest == BB)
3605 continue; // Skip self loops.
3606
3607 // Skip if the predecessor's terminator is an indirect branch.
3608 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3609 return isa<IndirectBrInst>(PredBB->getTerminator());
3610 }))
3611 continue;
3612
3613 LLVM_DEBUG({
3614 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3615 << " has value " << *Pair.first << " in predecessors:\n";
3616 for (const BasicBlock *PredBB : Pair.second)
3617 dbgs() << " " << PredBB->getName() << "\n";
3618 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3619 });
3620
3621 // Split the predecessors we are threading into a new edge block. We'll
3622 // clone the instructions into this block, and then redirect it to RealDest.
3623 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3624
3625 // TODO: These just exist to reduce test diff, we can drop them if we like.
3626 EdgeBB->setName(RealDest->getName() + ".critedge");
3627 EdgeBB->moveBefore(RealDest);
3628
3629 // Update PHI nodes.
3630 addPredecessorToBlock(RealDest, EdgeBB, BB);
3631
3632 // BB may have instructions that are being threaded over. Clone these
3633 // instructions into EdgeBB. We know that there will be no uses of the
3634 // cloned instructions outside of EdgeBB.
3635 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3636 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3637 TranslateMap[Cond] = CB;
3638
3639 // RemoveDIs: track instructions that we optimise away while folding, so
3640 // that we can copy DbgVariableRecords from them later.
3641 BasicBlock::iterator SrcDbgCursor = BB->begin();
3642 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3643 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3644 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3645 continue;
3646 }
3647 // Clone the instruction.
3648 Instruction *N = BBI->clone();
3649 // Insert the new instruction into its new home.
3650 N->insertInto(EdgeBB, InsertPt);
3651
3652 if (BBI->hasName())
3653 N->setName(BBI->getName() + ".c");
3654
3655 // Update operands due to translation.
3656 for (Use &Op : N->operands()) {
3657 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3658 if (PI != TranslateMap.end())
3659 Op = PI->second;
3660 }
3661
3662 // Check for trivial simplification.
3663 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3664 if (!BBI->use_empty())
3665 TranslateMap[&*BBI] = V;
3666 if (!N->mayHaveSideEffects()) {
3667 N->eraseFromParent(); // Instruction folded away, don't need actual
3668 // inst
3669 N = nullptr;
3670 }
3671 } else {
3672 if (!BBI->use_empty())
3673 TranslateMap[&*BBI] = N;
3674 }
3675 if (N) {
3676 // Copy all debug-info attached to instructions from the last we
3677 // successfully clone, up to this instruction (they might have been
3678 // folded away).
3679 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3680 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3681 SrcDbgCursor = std::next(BBI);
3682 // Clone debug-info on this instruction too.
3683 N->cloneDebugInfoFrom(&*BBI);
3684
3685 // Register the new instruction with the assumption cache if necessary.
3686 if (auto *Assume = dyn_cast<AssumeInst>(N))
3687 if (AC)
3688 AC->registerAssumption(Assume);
3689 }
3690 }
3691
3692 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3693 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3694 InsertPt->cloneDebugInfoFrom(BI);
3695
3696 BB->removePredecessor(EdgeBB);
3697 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3698 EdgeBI->setSuccessor(0, RealDest);
3699 EdgeBI->setDebugLoc(BI->getDebugLoc());
3700
3701 if (DTU) {
3703 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3704 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3705 DTU->applyUpdates(Updates);
3706 }
3707
3708 // For simplicity, we created a separate basic block for the edge. Merge
3709 // it back into the predecessor if possible. This not only avoids
3710 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3711 // bypass the check for trivial cycles above.
3712 MergeBlockIntoPredecessor(EdgeBB, DTU);
3713
3714 // Signal repeat, simplifying any other constants.
3715 return std::nullopt;
3716 }
3717
3718 return false;
3719}
3720
3722 DomTreeUpdater *DTU,
3723 const DataLayout &DL,
3724 AssumptionCache *AC) {
3725 std::optional<bool> Result;
3726 bool EverChanged = false;
3727 do {
3728 // Note that None means "we changed things, but recurse further."
3729 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3730 EverChanged |= Result == std::nullopt || *Result;
3731 } while (Result == std::nullopt);
3732 return EverChanged;
3733}
3734
3735/// Given a BB that starts with the specified two-entry PHI node,
3736/// see if we can eliminate it.
3739 const DataLayout &DL,
3740 bool SpeculateUnpredictables) {
3741 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3742 // statement", which has a very simple dominance structure. Basically, we
3743 // are trying to find the condition that is being branched on, which
3744 // subsequently causes this merge to happen. We really want control
3745 // dependence information for this check, but simplifycfg can't keep it up
3746 // to date, and this catches most of the cases we care about anyway.
3747 BasicBlock *BB = PN->getParent();
3748
3749 BasicBlock *IfTrue, *IfFalse;
3750 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3751 if (!DomBI)
3752 return false;
3753 Value *IfCond = DomBI->getCondition();
3754 // Don't bother if the branch will be constant folded trivially.
3755 if (isa<ConstantInt>(IfCond))
3756 return false;
3757
3758 BasicBlock *DomBlock = DomBI->getParent();
3761 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3762 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3763 });
3764 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3765 "Will have either one or two blocks to speculate.");
3766
3767 // If the branch is non-unpredictable, see if we either predictably jump to
3768 // the merge bb (if we have only a single 'then' block), or if we predictably
3769 // jump to one specific 'then' block (if we have two of them).
3770 // It isn't beneficial to speculatively execute the code
3771 // from the block that we know is predictably not entered.
3772 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3773 if (!IsUnpredictable) {
3774 uint64_t TWeight, FWeight;
3775 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3776 (TWeight + FWeight) != 0) {
3777 BranchProbability BITrueProb =
3778 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3780 BranchProbability BIFalseProb = BITrueProb.getCompl();
3781 if (IfBlocks.size() == 1) {
3782 BranchProbability BIBBProb =
3783 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3784 if (BIBBProb >= Likely)
3785 return false;
3786 } else {
3787 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3788 return false;
3789 }
3790 }
3791 }
3792
3793 // Don't try to fold an unreachable block. For example, the phi node itself
3794 // can't be the candidate if-condition for a select that we want to form.
3795 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3796 if (IfCondPhiInst->getParent() == BB)
3797 return false;
3798
3799 // Okay, we found that we can merge this two-entry phi node into a select.
3800 // Doing so would require us to fold *all* two entry phi nodes in this block.
3801 // At some point this becomes non-profitable (particularly if the target
3802 // doesn't support cmov's). Only do this transformation if there are two or
3803 // fewer PHI nodes in this block.
3804 unsigned NumPhis = 0;
3805 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3806 if (NumPhis > 2)
3807 return false;
3808
3809 // Loop over the PHI's seeing if we can promote them all to select
3810 // instructions. While we are at it, keep track of the instructions
3811 // that need to be moved to the dominating block.
3812 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3814 InstructionCost Budget =
3816 if (SpeculateUnpredictables && IsUnpredictable)
3817 Budget += TTI.getBranchMispredictPenalty();
3818
3819 bool Changed = false;
3820 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3821 PHINode *PN = cast<PHINode>(II++);
3822 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3823 PN->replaceAllUsesWith(V);
3824 PN->eraseFromParent();
3825 Changed = true;
3826 continue;
3827 }
3828
3829 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3830 AggressiveInsts, Cost, Budget, TTI, AC) ||
3831 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3832 AggressiveInsts, Cost, Budget, TTI, AC))
3833 return Changed;
3834 }
3835
3836 // If we folded the first phi, PN dangles at this point. Refresh it. If
3837 // we ran out of PHIs then we simplified them all.
3838 PN = dyn_cast<PHINode>(BB->begin());
3839 if (!PN)
3840 return true;
3841
3842 // Return true if at least one of these is a 'not', and another is either
3843 // a 'not' too, or a constant.
3844 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3845 if (!match(V0, m_Not(m_Value())))
3846 std::swap(V0, V1);
3847 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3848 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3849 };
3850
3851 // Don't fold i1 branches on PHIs which contain binary operators or
3852 // (possibly inverted) select form of or/ands, unless one of
3853 // the incoming values is an 'not' and another one is freely invertible.
3854 // These can often be turned into switches and other things.
3855 auto IsBinOpOrAnd = [](Value *V) {
3856 return match(
3858 };
3859 if (PN->getType()->isIntegerTy(1) &&
3860 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3861 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3862 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3863 PN->getIncomingValue(1)))
3864 return Changed;
3865
3866 // If all PHI nodes are promotable, check to make sure that all instructions
3867 // in the predecessor blocks can be promoted as well. If not, we won't be able
3868 // to get rid of the control flow, so it's not worth promoting to select
3869 // instructions.
3870 for (BasicBlock *IfBlock : IfBlocks)
3871 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3872 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3873 // This is not an aggressive instruction that we can promote.
3874 // Because of this, we won't be able to get rid of the control flow, so
3875 // the xform is not worth it.
3876 return Changed;
3877 }
3878
3879 // If either of the blocks has it's address taken, we can't do this fold.
3880 if (any_of(IfBlocks,
3881 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3882 return Changed;
3883
3884 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3885 if (IsUnpredictable) dbgs() << " (unpredictable)";
3886 dbgs() << " T: " << IfTrue->getName()
3887 << " F: " << IfFalse->getName() << "\n");
3888
3889 // If we can still promote the PHI nodes after this gauntlet of tests,
3890 // do all of the PHI's now.
3891
3892 // Move all 'aggressive' instructions, which are defined in the
3893 // conditional parts of the if's up to the dominating block.
3894 for (BasicBlock *IfBlock : IfBlocks)
3895 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3896
3897 IRBuilder<NoFolder> Builder(DomBI);
3898 // Propagate fast-math-flags from phi nodes to replacement selects.
3899 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3900 // Change the PHI node into a select instruction.
3901 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3902 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3903
3904 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3905 isa<FPMathOperator>(PN) ? PN : nullptr,
3906 "", DomBI);
3907 PN->replaceAllUsesWith(Sel);
3908 Sel->takeName(PN);
3909 PN->eraseFromParent();
3910 }
3911
3912 // At this point, all IfBlocks are empty, so our if statement
3913 // has been flattened. Change DomBlock to jump directly to our new block to
3914 // avoid other simplifycfg's kicking in on the diamond.
3915 Builder.CreateBr(BB);
3916
3918 if (DTU) {
3919 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3920 for (auto *Successor : successors(DomBlock))
3921 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3922 }
3923
3924 DomBI->eraseFromParent();
3925 if (DTU)
3926 DTU->applyUpdates(Updates);
3927
3928 return true;
3929}
3930
3932 Instruction::BinaryOps Opc, Value *LHS,
3933 Value *RHS, const Twine &Name = "") {
3934 // Try to relax logical op to binary op.
3935 if (impliesPoison(RHS, LHS))
3936 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3937 if (Opc == Instruction::And)
3938 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3939 if (Opc == Instruction::Or)
3940 return Builder.CreateLogicalOr(LHS, RHS, Name);
3941 llvm_unreachable("Invalid logical opcode");
3942}
3943
3944/// Return true if either PBI or BI has branch weight available, and store
3945/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3946/// not have branch weight, use 1:1 as its weight.
3948 uint64_t &PredTrueWeight,
3949 uint64_t &PredFalseWeight,
3950 uint64_t &SuccTrueWeight,
3951 uint64_t &SuccFalseWeight) {
3952 bool PredHasWeights =
3953 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3954 bool SuccHasWeights =
3955 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3956 if (PredHasWeights || SuccHasWeights) {
3957 if (!PredHasWeights)
3958 PredTrueWeight = PredFalseWeight = 1;
3959 if (!SuccHasWeights)
3960 SuccTrueWeight = SuccFalseWeight = 1;
3961 return true;
3962 } else {
3963 return false;
3964 }
3965}
3966
3967/// Determine if the two branches share a common destination and deduce a glue
3968/// that joins the branches' conditions to arrive at the common destination if
3969/// that would be profitable.
3970static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3972 const TargetTransformInfo *TTI) {
3973 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3974 "Both blocks must end with a conditional branches.");
3976 "PredBB must be a predecessor of BB.");
3977
3978 // We have the potential to fold the conditions together, but if the
3979 // predecessor branch is predictable, we may not want to merge them.
3980 uint64_t PTWeight, PFWeight;
3981 BranchProbability PBITrueProb, Likely;
3982 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3983 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3984 (PTWeight + PFWeight) != 0) {
3985 PBITrueProb =
3986 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3988 }
3989
3990 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3991 // Speculate the 2nd condition unless the 1st is probably true.
3992 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3993 return {{BI->getSuccessor(0), Instruction::Or, false}};
3994 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3995 // Speculate the 2nd condition unless the 1st is probably false.
3996 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3997 return {{BI->getSuccessor(1), Instruction::And, false}};
3998 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3999 // Speculate the 2nd condition unless the 1st is probably true.
4000 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4001 return {{BI->getSuccessor(1), Instruction::And, true}};
4002 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4003 // Speculate the 2nd condition unless the 1st is probably false.
4004 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4005 return {{BI->getSuccessor(0), Instruction::Or, true}};
4006 }
4007 return std::nullopt;
4008}
4009
4011 DomTreeUpdater *DTU,
4012 MemorySSAUpdater *MSSAU,
4013 const TargetTransformInfo *TTI) {
4014 BasicBlock *BB = BI->getParent();
4015 BasicBlock *PredBlock = PBI->getParent();
4016
4017 // Determine if the two branches share a common destination.
4018 BasicBlock *CommonSucc;
4020 bool InvertPredCond;
4021 std::tie(CommonSucc, Opc, InvertPredCond) =
4023
4024 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4025
4026 IRBuilder<> Builder(PBI);
4027 // The builder is used to create instructions to eliminate the branch in BB.
4028 // If BB's terminator has !annotation metadata, add it to the new
4029 // instructions.
4031 {LLVMContext::MD_annotation});
4032
4033 // If we need to invert the condition in the pred block to match, do so now.
4034 if (InvertPredCond) {
4035 InvertBranch(PBI, Builder);
4036 }
4037
4038 BasicBlock *UniqueSucc =
4039 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4040
4041 // Before cloning instructions, notify the successor basic block that it
4042 // is about to have a new predecessor. This will update PHI nodes,
4043 // which will allow us to update live-out uses of bonus instructions.
4044 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4045
4046 // Try to update branch weights.
4047 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4048 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4049 SuccTrueWeight, SuccFalseWeight)) {
4050 SmallVector<uint64_t, 8> NewWeights;
4051
4052 if (PBI->getSuccessor(0) == BB) {
4053 // PBI: br i1 %x, BB, FalseDest
4054 // BI: br i1 %y, UniqueSucc, FalseDest
4055 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4056 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4057 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4058 // TrueWeight for PBI * FalseWeight for BI.
4059 // We assume that total weights of a BranchInst can fit into 32 bits.
4060 // Therefore, we will not have overflow using 64-bit arithmetic.
4061 NewWeights.push_back(PredFalseWeight *
4062 (SuccFalseWeight + SuccTrueWeight) +
4063 PredTrueWeight * SuccFalseWeight);
4064 } else {
4065 // PBI: br i1 %x, TrueDest, BB
4066 // BI: br i1 %y, TrueDest, UniqueSucc
4067 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4068 // FalseWeight for PBI * TrueWeight for BI.
4069 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4070 PredFalseWeight * SuccTrueWeight);
4071 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4072 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4073 }
4074
4075 // Halve the weights if any of them cannot fit in an uint32_t
4076 fitWeights(NewWeights);
4077
4078 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4079 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4080
4081 // TODO: If BB is reachable from all paths through PredBlock, then we
4082 // could replace PBI's branch probabilities with BI's.
4083 } else
4084 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4085
4086 // Now, update the CFG.
4087 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4088
4089 if (DTU)
4090 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4091 {DominatorTree::Delete, PredBlock, BB}});
4092
4093 // If BI was a loop latch, it may have had associated loop metadata.
4094 // We need to copy it to the new latch, that is, PBI.
4095 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4096 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4097
4098 ValueToValueMapTy VMap; // maps original values to cloned values
4100
4101 Module *M = BB->getModule();
4102
4103 if (PredBlock->IsNewDbgInfoFormat) {
4104 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4105 for (DbgVariableRecord &DVR :
4107 RemapDbgRecord(M, &DVR, VMap,
4109 }
4110 }
4111
4112 // Now that the Cond was cloned into the predecessor basic block,
4113 // or/and the two conditions together.
4114 Value *BICond = VMap[BI->getCondition()];
4115 PBI->setCondition(
4116 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4117
4118 ++NumFoldBranchToCommonDest;
4119 return true;
4120}
4121
4122/// Return if an instruction's type or any of its operands' types are a vector
4123/// type.
4124static bool isVectorOp(Instruction &I) {
4125 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4126 return U->getType()->isVectorTy();
4127 });
4128}
4129
4130/// If this basic block is simple enough, and if a predecessor branches to us
4131/// and one of our successors, fold the block into the predecessor and use
4132/// logical operations to pick the right destination.
4134 MemorySSAUpdater *MSSAU,
4135 const TargetTransformInfo *TTI,
4136 unsigned BonusInstThreshold) {
4137 // If this block ends with an unconditional branch,
4138 // let speculativelyExecuteBB() deal with it.
4139 if (!BI->isConditional())
4140 return false;
4141
4142 BasicBlock *BB = BI->getParent();
4146
4147 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4148
4149 if (!Cond ||
4150 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
4151 !isa<SelectInst>(Cond)) ||
4152 Cond->getParent() != BB || !Cond->hasOneUse())
4153 return false;
4154
4155 // Finally, don't infinitely unroll conditional loops.
4156 if (is_contained(successors(BB), BB))
4157 return false;
4158
4159 // With which predecessors will we want to deal with?
4161 for (BasicBlock *PredBlock : predecessors(BB)) {
4162 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4163
4164 // Check that we have two conditional branches. If there is a PHI node in
4165 // the common successor, verify that the same value flows in from both
4166 // blocks.
4167 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4168 continue;
4169
4170 // Determine if the two branches share a common destination.
4171 BasicBlock *CommonSucc;
4173 bool InvertPredCond;
4174 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4175 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4176 else
4177 continue;
4178
4179 // Check the cost of inserting the necessary logic before performing the
4180 // transformation.
4181 if (TTI) {
4182 Type *Ty = BI->getCondition()->getType();
4184 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4185 !isa<CmpInst>(PBI->getCondition())))
4186 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4187
4189 continue;
4190 }
4191
4192 // Ok, we do want to deal with this predecessor. Record it.
4193 Preds.emplace_back(PredBlock);
4194 }
4195
4196 // If there aren't any predecessors into which we can fold,
4197 // don't bother checking the cost.
4198 if (Preds.empty())
4199 return false;
4200
4201 // Only allow this transformation if computing the condition doesn't involve
4202 // too many instructions and these involved instructions can be executed
4203 // unconditionally. We denote all involved instructions except the condition
4204 // as "bonus instructions", and only allow this transformation when the
4205 // number of the bonus instructions we'll need to create when cloning into
4206 // each predecessor does not exceed a certain threshold.
4207 unsigned NumBonusInsts = 0;
4208 bool SawVectorOp = false;
4209 const unsigned PredCount = Preds.size();
4210 for (Instruction &I : *BB) {
4211 // Don't check the branch condition comparison itself.
4212 if (&I == Cond)
4213 continue;
4214 // Ignore dbg intrinsics, and the terminator.
4215 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
4216 continue;
4217 // I must be safe to execute unconditionally.
4219 return false;
4220 SawVectorOp |= isVectorOp(I);
4221
4222 // Account for the cost of duplicating this instruction into each
4223 // predecessor. Ignore free instructions.
4224 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4226 NumBonusInsts += PredCount;
4227
4228 // Early exits once we reach the limit.
4229 if (NumBonusInsts >
4230 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4231 return false;
4232 }
4233
4234 auto IsBCSSAUse = [BB, &I](Use &U) {
4235 auto *UI = cast<Instruction>(U.getUser());
4236 if (auto *PN = dyn_cast<PHINode>(UI))
4237 return PN->getIncomingBlock(U) == BB;
4238 return UI->getParent() == BB && I.comesBefore(UI);
4239 };
4240
4241 // Does this instruction require rewriting of uses?
4242 if (!all_of(I.uses(), IsBCSSAUse))
4243 return false;
4244 }
4245 if (NumBonusInsts >
4246 BonusInstThreshold *
4247 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4248 return false;
4249
4250 // Ok, we have the budget. Perform the transformation.
4251 for (BasicBlock *PredBlock : Preds) {
4252 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4253 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4254 }
4255 return false;
4256}
4257
4258// If there is only one store in BB1 and BB2, return it, otherwise return
4259// nullptr.
4261 StoreInst *S = nullptr;
4262 for (auto *BB : {BB1, BB2}) {
4263 if (!BB)
4264 continue;
4265 for (auto &I : *BB)
4266 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4267 if (S)
4268 // Multiple stores seen.
4269 return nullptr;
4270 else
4271 S = SI;
4272 }
4273 }
4274 return S;
4275}
4276
4278 Value *AlternativeV = nullptr) {
4279 // PHI is going to be a PHI node that allows the value V that is defined in
4280 // BB to be referenced in BB's only successor.
4281 //
4282 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4283 // doesn't matter to us what the other operand is (it'll never get used). We
4284 // could just create a new PHI with an undef incoming value, but that could
4285 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4286 // other PHI. So here we directly look for some PHI in BB's successor with V
4287 // as an incoming operand. If we find one, we use it, else we create a new
4288 // one.
4289 //
4290 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4291 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4292 // where OtherBB is the single other predecessor of BB's only successor.
4293 PHINode *PHI = nullptr;
4294 BasicBlock *Succ = BB->getSingleSuccessor();
4295
4296 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4297 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4298 PHI = cast<PHINode>(I);
4299 if (!AlternativeV)
4300 break;
4301
4302 assert(Succ->hasNPredecessors(2));
4303 auto PredI = pred_begin(Succ);
4304 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4305 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4306 break;
4307 PHI = nullptr;
4308 }
4309 if (PHI)
4310 return PHI;
4311
4312 // If V is not an instruction defined in BB, just return it.
4313 if (!AlternativeV &&
4314 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4315 return V;
4316
4317 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4318 PHI->insertBefore(Succ->begin());
4319 PHI->addIncoming(V, BB);
4320 for (BasicBlock *PredBB : predecessors(Succ))
4321 if (PredBB != BB)
4322 PHI->addIncoming(
4323 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4324 return PHI;
4325}
4326
4328 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4329 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4330 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4331 // For every pointer, there must be exactly two stores, one coming from
4332 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4333 // store (to any address) in PTB,PFB or QTB,QFB.
4334 // FIXME: We could relax this restriction with a bit more work and performance
4335 // testing.
4336 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4337 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4338 if (!PStore || !QStore)
4339 return false;
4340
4341 // Now check the stores are compatible.
4342 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4343 PStore->getValueOperand()->getType() !=
4344 QStore->getValueOperand()->getType())
4345 return false;
4346
4347 // Check that sinking the store won't cause program behavior changes. Sinking
4348 // the store out of the Q blocks won't change any behavior as we're sinking
4349 // from a block to its unconditional successor. But we're moving a store from
4350 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4351 // So we need to check that there are no aliasing loads or stores in
4352 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4353 // operations between PStore and the end of its parent block.
4354 //
4355 // The ideal way to do this is to query AliasAnalysis, but we don't
4356 // preserve AA currently so that is dangerous. Be super safe and just
4357 // check there are no other memory operations at all.
4358 for (auto &I : *QFB->getSinglePredecessor())
4359 if (I.mayReadOrWriteMemory())
4360 return false;
4361 for (auto &I : *QFB)
4362 if (&I != QStore && I.mayReadOrWriteMemory())
4363 return false;
4364 if (QTB)
4365 for (auto &I : *QTB)
4366 if (&I != QStore && I.mayReadOrWriteMemory())
4367 return false;
4368 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4369 I != E; ++I)
4370 if (&*I != PStore && I->mayReadOrWriteMemory())
4371 return false;
4372
4373 // If we're not in aggressive mode, we only optimize if we have some
4374 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4375 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4376 if (!BB)
4377 return true;
4378 // Heuristic: if the block can be if-converted/phi-folded and the
4379 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4380 // thread this store.
4382 InstructionCost Budget =
4384 for (auto &I : BB->instructionsWithoutDebug(false)) {
4385 // Consider terminator instruction to be free.
4386 if (I.isTerminator())
4387 continue;
4388 // If this is one the stores that we want to speculate out of this BB,
4389 // then don't count it's cost, consider it to be free.
4390 if (auto *S = dyn_cast<StoreInst>(&I))
4391 if (llvm::find(FreeStores, S))
4392 continue;
4393 // Else, we have a white-list of instructions that we are ak speculating.
4394 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4395 return false; // Not in white-list - not worthwhile folding.
4396 // And finally, if this is a non-free instruction that we are okay
4397 // speculating, ensure that we consider the speculation budget.
4398 Cost +=
4400 if (Cost > Budget)
4401 return false; // Eagerly refuse to fold as soon as we're out of budget.
4402 }
4403 assert(Cost <= Budget &&
4404 "When we run out of budget we will eagerly return from within the "
4405 "per-instruction loop.");
4406 return true;
4407 };
4408
4409 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4411 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4412 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4413 return false;
4414
4415 // If PostBB has more than two predecessors, we need to split it so we can
4416 // sink the store.
4417 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4418 // We know that QFB's only successor is PostBB. And QFB has a single
4419 // predecessor. If QTB exists, then its only successor is also PostBB.
4420 // If QTB does not exist, then QFB's only predecessor has a conditional
4421 // branch to QFB and PostBB.
4422 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4423 BasicBlock *NewBB =
4424 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4425 if (!NewBB)
4426 return false;
4427 PostBB = NewBB;
4428 }
4429
4430 // OK, we're going to sink the stores to PostBB. The store has to be
4431 // conditional though, so first create the predicate.
4432 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4433 ->getCondition();
4434 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4435 ->getCondition();
4436
4438 PStore->getParent());
4440 QStore->getParent(), PPHI);
4441
4442 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4443 IRBuilder<> QB(PostBB, PostBBFirst);
4444 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4445
4446 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4447 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4448
4449 if (InvertPCond)
4450 PPred = QB.CreateNot(PPred);
4451 if (InvertQCond)
4452 QPred = QB.CreateNot(QPred);
4453 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4454
4455 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4456 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4457 /*Unreachable=*/false,
4458 /*BranchWeights=*/nullptr, DTU);
4459
4460 QB.SetInsertPoint(T);
4461 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4462 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4463 // Choose the minimum alignment. If we could prove both stores execute, we
4464 // could use biggest one. In this case, though, we only know that one of the
4465 // stores executes. And we don't know it's safe to take the alignment from a
4466 // store that doesn't execute.
4467 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4468
4469 QStore->eraseFromParent();
4470 PStore->eraseFromParent();
4471
4472 return true;
4473}
4474
4476 DomTreeUpdater *DTU, const DataLayout &DL,
4477 const TargetTransformInfo &TTI) {
4478 // The intention here is to find diamonds or triangles (see below) where each
4479 // conditional block contains a store to the same address. Both of these
4480 // stores are conditional, so they can't be unconditionally sunk. But it may
4481 // be profitable to speculatively sink the stores into one merged store at the
4482 // end, and predicate the merged store on the union of the two conditions of
4483 // PBI and QBI.
4484 //
4485 // This can reduce the number of stores executed if both of the conditions are
4486 // true, and can allow the blocks to become small enough to be if-converted.
4487 // This optimization will also chain, so that ladders of test-and-set
4488 // sequences can be if-converted away.
4489 //
4490 // We only deal with simple diamonds or triangles:
4491 //
4492 // PBI or PBI or a combination of the two
4493 // / \ | \
4494 // PTB PFB | PFB
4495 // \ / | /
4496 // QBI QBI
4497 // / \ | \
4498 // QTB QFB | QFB
4499 // \ / | /
4500 // PostBB PostBB
4501 //
4502 // We model triangles as a type of diamond with a nullptr "true" block.
4503 // Triangles are canonicalized so that the fallthrough edge is represented by
4504 // a true condition, as in the diagram above.
4505 BasicBlock *PTB = PBI->getSuccessor(0);
4506 BasicBlock *PFB = PBI->getSuccessor(1);
4507 BasicBlock *QTB = QBI->getSuccessor(0);
4508 BasicBlock *QFB = QBI->getSuccessor(1);
4509 BasicBlock *PostBB = QFB->getSingleSuccessor();
4510
4511 // Make sure we have a good guess for PostBB. If QTB's only successor is
4512 // QFB, then QFB is a better PostBB.
4513 if (QTB->getSingleSuccessor() == QFB)
4514 PostBB = QFB;
4515
4516 // If we couldn't find a good PostBB, stop.
4517 if (!PostBB)
4518 return false;
4519
4520 bool InvertPCond = false, InvertQCond = false;
4521 // Canonicalize fallthroughs to the true branches.
4522 if (PFB == QBI->getParent()) {
4523 std::swap(PFB, PTB);
4524 InvertPCond = true;
4525 }
4526 if (QFB == PostBB) {
4527 std::swap(QFB, QTB);
4528 InvertQCond = true;
4529 }
4530
4531 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4532 // and QFB may not. Model fallthroughs as a nullptr block.
4533 if (PTB == QBI->getParent())
4534 PTB = nullptr;
4535 if (QTB == PostBB)
4536 QTB = nullptr;
4537
4538 // Legality bailouts. We must have at least the non-fallthrough blocks and
4539 // the post-dominating block, and the non-fallthroughs must only have one
4540 // predecessor.
4541 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4542 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4543 };
4544 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4545 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4546 return false;
4547 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4548 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4549 return false;
4550 if (!QBI->getParent()->hasNUses(2))
4551 return false;
4552
4553 // OK, this is a sequence of two diamonds or triangles.
4554 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4555 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4556 for (auto *BB : {PTB, PFB}) {
4557 if (!BB)
4558 continue;
4559 for (auto &I : *BB)
4560 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4561 PStoreAddresses.insert(SI->getPointerOperand());
4562 }
4563 for (auto *BB : {QTB, QFB}) {
4564 if (!BB)
4565 continue;
4566 for (auto &I : *BB)
4567 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4568 QStoreAddresses.insert(SI->getPointerOperand());
4569 }
4570
4571 set_intersect(PStoreAddresses, QStoreAddresses);
4572 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4573 // clear what it contains.
4574 auto &CommonAddresses = PStoreAddresses;
4575
4576 bool Changed = false;
4577 for (auto *Address : CommonAddresses)
4578 Changed |=
4579 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4580 InvertPCond, InvertQCond, DTU, DL, TTI);
4581 return Changed;
4582}
4583
4584/// If the previous block ended with a widenable branch, determine if reusing
4585/// the target block is profitable and legal. This will have the effect of
4586/// "widening" PBI, but doesn't require us to reason about hosting safety.
4588 DomTreeUpdater *DTU) {
4589 // TODO: This can be generalized in two important ways:
4590 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4591 // values from the PBI edge.
4592 // 2) We can sink side effecting instructions into BI's fallthrough
4593 // successor provided they doesn't contribute to computation of
4594 // BI's condition.
4595 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4596 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4597 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4598 !BI->getParent()->getSinglePredecessor())
4599 return false;
4600 if (!IfFalseBB->phis().empty())
4601 return false; // TODO
4602 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4603 // may undo the transform done here.
4604 // TODO: There might be a more fine-grained solution to this.
4605 if (!llvm::succ_empty(IfFalseBB))
4606 return false;
4607 // Use lambda to lazily compute expensive condition after cheap ones.
4608 auto NoSideEffects = [](BasicBlock &BB) {
4609 return llvm::none_of(BB, [](const Instruction &I) {
4610 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4611 });
4612 };
4613 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4614 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4615 NoSideEffects(*BI->getParent())) {
4616 auto *OldSuccessor = BI->getSuccessor(1);
4617 OldSuccessor->removePredecessor(BI->getParent());
4618 BI->setSuccessor(1, IfFalseBB);
4619 if (DTU)
4620 DTU->applyUpdates(
4621 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4622 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4623 return true;
4624 }
4625 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4626 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4627 NoSideEffects(*BI->getParent())) {
4628 auto *OldSuccessor = BI->getSuccessor(0);
4629 OldSuccessor->removePredecessor(BI->getParent());
4630 BI->setSuccessor(0, IfFalseBB);
4631 if (DTU)
4632 DTU->applyUpdates(
4633 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4634 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4635 return true;
4636 }
4637 return false;
4638}
4639
4640/// If we have a conditional branch as a predecessor of another block,
4641/// this function tries to simplify it. We know
4642/// that PBI and BI are both conditional branches, and BI is in one of the
4643/// successor blocks of PBI - PBI branches to BI.
4645 DomTreeUpdater *DTU,
4646 const DataLayout &DL,
4647 const TargetTransformInfo &TTI) {
4648 assert(PBI->isConditional() && BI->isConditional());
4649 BasicBlock *BB = BI->getParent();
4650
4651 // If this block ends with a branch instruction, and if there is a
4652 // predecessor that ends on a branch of the same condition, make
4653 // this conditional branch redundant.
4654 if (PBI->getCondition() == BI->getCondition() &&
4655 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4656 // Okay, the outcome of this conditional branch is statically
4657 // knowable. If this block had a single pred, handle specially, otherwise
4658 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4659 if (BB->getSinglePredecessor()) {
4660 // Turn this into a branch on constant.
4661 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4662 BI->setCondition(
4663 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4664 return true; // Nuke the branch on constant.
4665 }
4666 }
4667
4668 // If the previous block ended with a widenable branch, determine if reusing
4669 // the target block is profitable and legal. This will have the effect of
4670 // "widening" PBI, but doesn't require us to reason about hosting safety.
4671 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4672 return true;
4673
4674 // If both branches are conditional and both contain stores to the same
4675 // address, remove the stores from the conditionals and create a conditional
4676 // merged store at the end.
4677 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4678 return true;
4679
4680 // If this is a conditional branch in an empty block, and if any
4681 // predecessors are a conditional branch to one of our destinations,
4682 // fold the conditions into logical ops and one cond br.
4683
4684 // Ignore dbg intrinsics.
4685 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4686 return false;
4687
4688 int PBIOp, BIOp;
4689 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4690 PBIOp = 0;
4691 BIOp = 0;
4692 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4693 PBIOp = 0;
4694 BIOp = 1;
4695 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4696 PBIOp = 1;
4697 BIOp = 0;
4698 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4699 PBIOp = 1;
4700 BIOp = 1;
4701 } else {
4702 return false;
4703 }
4704
4705 // Check to make sure that the other destination of this branch
4706 // isn't BB itself. If so, this is an infinite loop that will
4707 // keep getting unwound.
4708 if (PBI->getSuccessor(PBIOp) == BB)
4709 return false;
4710
4711 // If predecessor's branch probability to BB is too low don't merge branches.
4712 SmallVector<uint32_t, 2> PredWeights;
4713 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4714 extractBranchWeights(*PBI, PredWeights) &&
4715 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4716
4718 PredWeights[PBIOp],
4719 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4720
4722 if (CommonDestProb >= Likely)
4723 return false;
4724 }
4725
4726 // Do not perform this transformation if it would require
4727 // insertion of a large number of select instructions. For targets
4728 // without predication/cmovs, this is a big pessimization.
4729
4730 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4731 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4732 unsigned NumPhis = 0;
4733 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4734 ++II, ++NumPhis) {
4735 if (NumPhis > 2) // Disable this xform.
4736 return false;
4737 }
4738
4739 // Finally, if everything is ok, fold the branches to logical ops.
4740 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4741
4742 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4743 << "AND: " << *BI->getParent());
4744
4746
4747 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4748 // branch in it, where one edge (OtherDest) goes back to itself but the other
4749 // exits. We don't *know* that the program avoids the infinite loop
4750 // (even though that seems likely). If we do this xform naively, we'll end up
4751 // recursively unpeeling the loop. Since we know that (after the xform is
4752 // done) that the block *is* infinite if reached, we just make it an obviously
4753 // infinite loop with no cond branch.
4754 if (OtherDest == BB) {
4755 // Insert it at the end of the function, because it's either code,
4756 // or it won't matter if it's hot. :)
4757 BasicBlock *InfLoopBlock =
4758 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4759 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4760 if (DTU)
4761 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4762 OtherDest = InfLoopBlock;
4763 }
4764
4765 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4766
4767 // BI may have other predecessors. Because of this, we leave
4768 // it alone, but modify PBI.
4769
4770 // Make sure we get to CommonDest on True&True directions.
4771 Value *PBICond = PBI->getCondition();
4772 IRBuilder<NoFolder> Builder(PBI);
4773 if (PBIOp)
4774 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4775
4776 Value *BICond = BI->getCondition();
4777 if (BIOp)
4778 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4779
4780 // Merge the conditions.
4781 Value *Cond =
4782 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4783
4784 // Modify PBI to branch on the new condition to the new dests.
4785 PBI->setCondition(Cond);
4786 PBI->setSuccessor(0, CommonDest);
4787 PBI->setSuccessor(1, OtherDest);
4788
4789 if (DTU) {
4790 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4791 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4792
4793 DTU->applyUpdates(Updates);
4794 }
4795
4796 // Update branch weight for PBI.
4797 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4798 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4799 bool HasWeights =
4800 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4801 SuccTrueWeight, SuccFalseWeight);
4802 if (HasWeights) {
4803 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4804 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4805 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4806 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4807 // The weight to CommonDest should be PredCommon * SuccTotal +
4808 // PredOther * SuccCommon.
4809 // The weight to OtherDest should be PredOther * SuccOther.
4810 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4811 PredOther * SuccCommon,
4812 PredOther * SuccOther};
4813 // Halve the weights if any of them cannot fit in an uint32_t
4814 fitWeights(NewWeights);
4815
4816 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4817 }
4818
4819 // OtherDest may have phi nodes. If so, add an entry from PBI's
4820 // block that are identical to the entries for BI's block.
4821 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4822
4823 // We know that the CommonDest already had an edge from PBI to
4824 // it. If it has PHIs though, the PHIs may have different
4825 // entries for BB and PBI's BB. If so, insert a select to make
4826 // them agree.
4827 for (PHINode &PN : CommonDest->phis()) {
4828 Value *BIV = PN.getIncomingValueForBlock(BB);
4829 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4830 Value *PBIV = PN.getIncomingValue(PBBIdx);
4831 if (BIV != PBIV) {
4832 // Insert a select in PBI to pick the right value.
4833 SelectInst *NV = cast<SelectInst>(
4834 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4835 PN.setIncomingValue(PBBIdx, NV);
4836 // Although the select has the same condition as PBI, the original branch
4837 // weights for PBI do not apply to the new select because the select's
4838 // 'logical' edges are incoming edges of the phi that is eliminated, not
4839 // the outgoing edges of PBI.
4840 if (HasWeights) {
4841 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4842 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4843 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4844 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4845 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4846 // The weight to PredOtherDest should be PredOther * SuccCommon.
4847 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4848 PredOther * SuccCommon};
4849
4850 fitWeights(NewWeights);
4851
4852 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4853 /*IsExpected=*/false);
4854 }
4855 }
4856 }
4857
4858 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4859 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4860
4861 // This basic block is probably dead. We know it has at least
4862 // one fewer predecessor.
4863 return true;
4864}
4865
4866// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4867// true or to FalseBB if Cond is false.
4868// Takes care of updating the successors and removing the old terminator.
4869// Also makes sure not to introduce new successors by assuming that edges to
4870// non-successor TrueBBs and FalseBBs aren't reachable.
4871bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4872 Value *Cond, BasicBlock *TrueBB,
4873 BasicBlock *FalseBB,
4874 uint32_t TrueWeight,
4875 uint32_t FalseWeight) {
4876 auto *BB = OldTerm->getParent();
4877 // Remove any superfluous successor edges from the CFG.
4878 // First, figure out which successors to preserve.
4879 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4880 // successor.
4881 BasicBlock *KeepEdge1 = TrueBB;
4882 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4883
4884 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4885
4886 // Then remove the rest.
4887 for (BasicBlock *Succ : successors(OldTerm)) {
4888 // Make sure only to keep exactly one copy of each edge.
4889 if (Succ == KeepEdge1)
4890 KeepEdge1 = nullptr;
4891 else if (Succ == KeepEdge2)
4892 KeepEdge2 = nullptr;
4893 else {
4894 Succ->removePredecessor(BB,
4895 /*KeepOneInputPHIs=*/true);
4896
4897 if (Succ != TrueBB && Succ != FalseBB)
4898 RemovedSuccessors.insert(Succ);
4899 }
4900 }
4901
4902 IRBuilder<> Builder(OldTerm);
4903 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4904
4905 // Insert an appropriate new terminator.
4906 if (!KeepEdge1 && !KeepEdge2) {
4907 if (TrueBB == FalseBB) {
4908 // We were only looking for one successor, and it was present.
4909 // Create an unconditional branch to it.
4910 Builder.CreateBr(TrueBB);
4911 } else {
4912 // We found both of the successors we were looking for.
4913 // Create a conditional branch sharing the condition of the select.
4914 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4915 if (TrueWeight != FalseWeight)
4916 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4917 }
4918 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4919 // Neither of the selected blocks were successors, so this
4920 // terminator must be unreachable.
4921 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4922 } else {
4923 // One of the selected values was a successor, but the other wasn't.
4924 // Insert an unconditional branch to the one that was found;
4925 // the edge to the one that wasn't must be unreachable.
4926 if (!KeepEdge1) {
4927 // Only TrueBB was found.
4928 Builder.CreateBr(TrueBB);
4929 } else {
4930 // Only FalseBB was found.
4931 Builder.CreateBr(FalseBB);
4932 }
4933 }
4934
4936
4937 if (DTU) {
4939 Updates.reserve(RemovedSuccessors.size());
4940 for (auto *RemovedSuccessor : RemovedSuccessors)
4941 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4942 DTU->applyUpdates(Updates);
4943 }
4944
4945 return true;
4946}
4947
4948// Replaces
4949// (switch (select cond, X, Y)) on constant X, Y
4950// with a branch - conditional if X and Y lead to distinct BBs,
4951// unconditional otherwise.
4952bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4953 SelectInst *Select) {
4954 // Check for constant integer values in the select.
4955 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4956 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4957 if (!TrueVal || !FalseVal)
4958 return false;
4959
4960 // Find the relevant condition and destinations.
4961 Value *Condition = Select->getCondition();
4962 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4963 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4964
4965 // Get weight for TrueBB and FalseBB.
4966 uint32_t TrueWeight = 0, FalseWeight = 0;
4968 bool HasWeights = hasBranchWeightMD(*SI);
4969 if (HasWeights) {
4970 getBranchWeights(SI, Weights);
4971 if (Weights.size() == 1 + SI->getNumCases()) {
4972 TrueWeight =
4973 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4974 FalseWeight =
4975 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4976 }
4977 }
4978
4979 // Perform the actual simplification.
4980 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4981 FalseWeight);
4982}
4983
4984// Replaces
4985// (indirectbr (select cond, blockaddress(@fn, BlockA),
4986// blockaddress(@fn, BlockB)))
4987// with
4988// (br cond, BlockA, BlockB).
4989bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4990 SelectInst *SI) {
4991 // Check that both operands of the select are block addresses.
4992 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4993 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4994 if (!TBA || !FBA)
4995 return false;
4996
4997 // Extract the actual blocks.
4998 BasicBlock *TrueBB = TBA->getBasicBlock();
4999 BasicBlock *FalseBB = FBA->getBasicBlock();
5000
5001 // Perform the actual simplification.
5002 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5003 0);
5004}
5005
5006/// This is called when we find an icmp instruction
5007/// (a seteq/setne with a constant) as the only instruction in a
5008/// block that ends with an uncond branch. We are looking for a very specific
5009/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5010/// this case, we merge the first two "or's of icmp" into a switch, but then the
5011/// default value goes to an uncond block with a seteq in it, we get something
5012/// like:
5013///
5014/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5015/// DEFAULT:
5016/// %tmp = icmp eq i8 %A, 92
5017/// br label %end
5018/// end:
5019/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5020///
5021/// We prefer to split the edge to 'end' so that there is a true/false entry to
5022/// the PHI, merging the third icmp into the switch.
5023bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5024 ICmpInst *ICI, IRBuilder<> &Builder) {
5025 BasicBlock *BB = ICI->getParent();
5026
5027 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5028 // complex.
5029 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5030 return false;
5031
5032 Value *V = ICI->getOperand(0);
5033 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5034
5035 // The pattern we're looking for is where our only predecessor is a switch on
5036 // 'V' and this block is the default case for the switch. In this case we can
5037 // fold the compared value into the switch to simplify things.
5038 BasicBlock *Pred = BB->getSinglePredecessor();
5039 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5040 return false;
5041
5042 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5043 if (SI->getCondition() != V)
5044 return false;
5045
5046 // If BB is reachable on a non-default case, then we simply know the value of
5047 // V in this block. Substitute it and constant fold the icmp instruction
5048 // away.
5049 if (SI->getDefaultDest() != BB) {
5050 ConstantInt *VVal = SI->findCaseDest(BB);
5051 assert(VVal && "Should have a unique destination value");
5052 ICI->setOperand(0, VVal);
5053
5054 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5055 ICI->replaceAllUsesWith(V);
5056 ICI->eraseFromParent();
5057 }
5058 // BB is now empty, so it is likely to simplify away.
5059 return requestResimplify();
5060 }
5061
5062 // Ok, the block is reachable from the default dest. If the constant we're
5063 // comparing exists in one of the other edges, then we can constant fold ICI
5064 // and zap it.
5065 if (SI->findCaseValue(Cst) != SI->case_default()) {
5066 Value *V;
5067 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5069 else
5071
5072 ICI->replaceAllUsesWith(V);
5073 ICI->eraseFromParent();
5074 // BB is now empty, so it is likely to simplify away.
5075 return requestResimplify();
5076 }
5077
5078 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5079 // the block.
5080 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5081 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5082 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5083 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5084 return false;
5085
5086 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5087 // true in the PHI.
5088 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5089 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5090
5091 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5092 std::swap(DefaultCst, NewCst);
5093
5094 // Replace ICI (which is used by the PHI for the default value) with true or
5095 // false depending on if it is EQ or NE.
5096 ICI->replaceAllUsesWith(DefaultCst);
5097 ICI->eraseFromParent();
5098
5100
5101 // Okay, the switch goes to this block on a default value. Add an edge from
5102 // the switch to the merge point on the compared value.
5103 BasicBlock *NewBB =
5104 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5105 {
5107 auto W0 = SIW.getSuccessorWeight(0);
5109 if (W0) {
5110 NewW = ((uint64_t(*W0) + 1) >> 1);
5111 SIW.setSuccessorWeight(0, *NewW);
5112 }
5113 SIW.addCase(Cst, NewBB, NewW);
5114 if (DTU)
5115 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5116 }
5117
5118 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5119 Builder.SetInsertPoint(NewBB);
5120 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5121 Builder.CreateBr(SuccBlock);
5122 PHIUse->addIncoming(NewCst, NewBB);
5123 if (DTU) {
5124 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5125 DTU->applyUpdates(Updates);
5126 }
5127 return true;
5128}
5129
5130/// The specified branch is a conditional branch.
5131/// Check to see if it is branching on an or/and chain of icmp instructions, and
5132/// fold it into a switch instruction if so.
5133bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5134 IRBuilder<> &Builder,
5135 const DataLayout &DL) {
5136 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5137 if (!Cond)
5138 return false;
5139
5140 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5141 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5142 // 'setne's and'ed together, collect them.
5143
5144 // Try to gather values from a chain of and/or to be turned into a switch
5145 ConstantComparesGatherer ConstantCompare(Cond, DL);
5146 // Unpack the result
5147 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5148 Value *CompVal = ConstantCompare.CompValue;
5149 unsigned UsedICmps = ConstantCompare.UsedICmps;
5150 Value *ExtraCase = ConstantCompare.Extra;
5151
5152 // If we didn't have a multiply compared value, fail.
5153 if (!CompVal)
5154 return false;
5155
5156 // Avoid turning single icmps into a switch.
5157 if (UsedICmps <= 1)
5158 return false;
5159
5160 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
5161
5162 // There might be duplicate constants in the list, which the switch
5163 // instruction can't handle, remove them now.
5164 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5165 Values.erase(llvm::unique(Values), Values.end());
5166
5167 // If Extra was used, we require at least two switch values to do the
5168 // transformation. A switch with one value is just a conditional branch.
5169 if (ExtraCase && Values.size() < 2)
5170 return false;
5171
5172 // TODO: Preserve branch weight metadata, similarly to how
5173 // foldValueComparisonIntoPredecessors preserves it.
5174
5175 // Figure out which block is which destination.
5176 BasicBlock *DefaultBB = BI->getSuccessor(1);
5177 BasicBlock *EdgeBB = BI->getSuccessor(0);
5178 if (!TrueWhenEqual)
5179 std::swap(DefaultBB, EdgeBB);
5180
5181 BasicBlock *BB = BI->getParent();
5182
5183 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5184 << " cases into SWITCH. BB is:\n"
5185 << *BB);
5186
5188
5189 // If there are any extra values that couldn't be folded into the switch
5190 // then we evaluate them with an explicit branch first. Split the block
5191 // right before the condbr to handle it.
5192 if (ExtraCase) {
5193 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5194 /*MSSAU=*/nullptr, "switch.early.test");
5195
5196 // Remove the uncond branch added to the old block.
5197 Instruction *OldTI = BB->getTerminator();
5198 Builder.SetInsertPoint(OldTI);
5199
5200 // There can be an unintended UB if extra values are Poison. Before the
5201 // transformation, extra values may not be evaluated according to the
5202 // condition, and it will not raise UB. But after transformation, we are
5203 // evaluating extra values before checking the condition, and it will raise
5204 // UB. It can be solved by adding freeze instruction to extra values.
5205 AssumptionCache *AC = Options.AC;
5206
5207 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5208 ExtraCase = Builder.CreateFreeze(ExtraCase);
5209
5210 if (TrueWhenEqual)
5211 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5212 else
5213 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5214
5215 OldTI->eraseFromParent();
5216
5217 if (DTU)
5218 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5219
5220 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5221 // for the edge we just added.
5222 addPredecessorToBlock(EdgeBB, BB, NewBB);
5223
5224 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5225 << "\nEXTRABB = " << *BB);
5226 BB = NewBB;
5227 }
5228
5229 Builder.SetInsertPoint(BI);
5230 // Convert pointer to int before we switch.
5231 if (CompVal->getType()->isPointerTy()) {
5232 CompVal = Builder.CreatePtrToInt(
5233 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5234 }
5235
5236 // Create the new switch instruction now.
5237 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5238
5239 // Add all of the 'cases' to the switch instruction.
5240 for (unsigned i = 0, e = Values.size(); i != e; ++i)
5241 New->addCase(Values[i], EdgeBB);
5242
5243 // We added edges from PI to the EdgeBB. As such, if there were any
5244 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5245 // the number of edges added.
5246 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5247 PHINode *PN = cast<PHINode>(BBI);
5248 Value *InVal = PN->getIncomingValueForBlock(BB);
5249 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5250 PN->addIncoming(InVal, BB);
5251 }
5252
5253 // Erase the old branch instruction.
5255 if (DTU)
5256 DTU->applyUpdates(Updates);
5257
5258 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5259 return true;
5260}
5261
5262bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5263 if (isa<PHINode>(RI->getValue()))
5264 return simplifyCommonResume(RI);
5265 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5266 RI->getValue() == RI->getParent()->getFirstNonPHI())
5267 // The resume must unwind the exception that caused control to branch here.
5268 return simplifySingleResume(RI);
5269
5270 return false;
5271}
5272
5273// Check if cleanup block is empty
5275 for (Instruction &I : R) {
5276 auto *II = dyn_cast<IntrinsicInst>(&I);
5277 if (!II)
5278 return false;
5279
5280 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5281 switch (IntrinsicID) {
5282 case Intrinsic::dbg_declare:
5283 case Intrinsic::dbg_value:
5284 case Intrinsic::dbg_label:
5285 case Intrinsic::lifetime_end:
5286 break;
5287 default:
5288 return false;
5289 }
5290 }
5291 return true;
5292}
5293
5294// Simplify resume that is shared by several landing pads (phi of landing pad).
5295bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5296 BasicBlock *BB = RI->getParent();
5297
5298 // Check that there are no other instructions except for debug and lifetime
5299 // intrinsics between the phi's and resume instruction.
5301 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
5302 return false;
5303
5304 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5305 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5306
5307 // Check incoming blocks to see if any of them are trivial.
5308 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5309 Idx++) {
5310 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5311 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5312
5313 // If the block has other successors, we can not delete it because
5314 // it has other dependents.
5315 if (IncomingBB->getUniqueSuccessor() != BB)
5316 continue;
5317
5318 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5319 // Not the landing pad that caused the control to branch here.
5320 if (IncomingValue != LandingPad)
5321 continue;
5322
5324 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5325 TrivialUnwindBlocks.insert(IncomingBB);
5326 }
5327
5328 // If no trivial unwind blocks, don't do any simplifications.
5329 if (TrivialUnwindBlocks.empty())
5330 return false;
5331
5332 // Turn all invokes that unwind here into calls.
5333 for (auto *TrivialBB : TrivialUnwindBlocks) {
5334 // Blocks that will be simplified should be removed from the phi node.
5335 // Note there could be multiple edges to the resume block, and we need
5336 // to remove them all.
5337 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5338 BB->removePredecessor(TrivialBB, true);
5339
5340 for (BasicBlock *Pred :
5342 removeUnwindEdge(Pred, DTU);
5343 ++NumInvokes;
5344 }
5345
5346 // In each SimplifyCFG run, only the current processed block can be erased.
5347 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5348 // of erasing TrivialBB, we only remove the branch to the common resume
5349 // block so that we can later erase the resume block since it has no
5350 // predecessors.
5351 TrivialBB->getTerminator()->eraseFromParent();
5352 new UnreachableInst(RI->getContext(), TrivialBB);
5353 if (DTU)
5354 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5355 }
5356
5357 // Delete the resume block if all its predecessors have been removed.
5358 if (pred_empty(BB))
5359 DeleteDeadBlock(BB, DTU);
5360
5361 return !TrivialUnwindBlocks.empty();
5362}
5363
5364// Simplify resume that is only used by a single (non-phi) landing pad.
5365bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5366 BasicBlock *BB = RI->getParent();
5367 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5368 assert(RI->getValue() == LPInst &&
5369 "Resume must unwind the exception that caused control to here");
5370
5371 // Check that there are no other instructions except for debug intrinsics.
5373 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5374 return false;
5375
5376 // Turn all invokes that unwind here into calls and delete the basic block.
5378 removeUnwindEdge(Pred, DTU);
5379 ++NumInvokes;
5380 }
5381
5382 // The landingpad is now unreachable. Zap it.
5383 DeleteDeadBlock(BB, DTU);
5384 return true;
5385}
5386
5388 // If this is a trivial cleanup pad that executes no instructions, it can be
5389 // eliminated. If the cleanup pad continues to the caller, any predecessor
5390 // that is an EH pad will be updated to continue to the caller and any
5391 // predecessor that terminates with an invoke instruction will have its invoke
5392 // instruction converted to a call instruction. If the cleanup pad being
5393 // simplified does not continue to the caller, each predecessor will be
5394 // updated to continue to the unwind destination of the cleanup pad being
5395 // simplified.
5396 BasicBlock *BB = RI->getParent();
5397 CleanupPadInst *CPInst = RI->getCleanupPad();
5398 if (CPInst->getParent() != BB)
5399 // This isn't an empty cleanup.
5400 return false;
5401
5402 // We cannot kill the pad if it has multiple uses. This typically arises
5403 // from unreachable basic blocks.
5404 if (!CPInst->hasOneUse())
5405 return false;
5406
5407 // Check that there are no other instructions except for benign intrinsics.
5409 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5410 return false;
5411
5412 // If the cleanup return we are simplifying unwinds to the caller, this will
5413 // set UnwindDest to nullptr.
5414 BasicBlock *UnwindDest = RI->getUnwindDest();
5415 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5416
5417 // We're about to remove BB from the control flow. Before we do, sink any
5418 // PHINodes into the unwind destination. Doing this before changing the
5419 // control flow avoids some potentially slow checks, since we can currently
5420 // be certain that UnwindDest and BB have no common predecessors (since they
5421 // are both EH pads).
5422 if (UnwindDest) {
5423 // First, go through the PHI nodes in UnwindDest and update any nodes that
5424 // reference the block we are removing
5425 for (PHINode &DestPN : UnwindDest->phis()) {
5426 int Idx = DestPN.getBasicBlockIndex(BB);
5427 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5428 assert(Idx != -1);
5429 // This PHI node has an incoming value that corresponds to a control
5430 // path through the cleanup pad we are removing. If the incoming
5431 // value is in the cleanup pad, it must be a PHINode (because we
5432 // verified above that the block is otherwise empty). Otherwise, the
5433 // value is either a constant or a value that dominates the cleanup
5434 // pad being removed.
5435 //
5436 // Because BB and UnwindDest are both EH pads, all of their
5437 // predecessors must unwind to these blocks, and since no instruction
5438 // can have multiple unwind destinations, there will be no overlap in
5439 // incoming blocks between SrcPN and DestPN.
5440 Value *SrcVal = DestPN.getIncomingValue(Idx);
5441 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5442
5443 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5444 for (auto *Pred : predecessors(BB)) {
5445 Value *Incoming =
5446 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5447 DestPN.addIncoming(Incoming, Pred);
5448 }
5449 }
5450
5451 // Sink any remaining PHI nodes directly into UnwindDest.
5452 Instruction *InsertPt = DestEHPad;
5453 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5454 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5455 // If the PHI node has no uses or all of its uses are in this basic
5456 // block (meaning they are debug or lifetime intrinsics), just leave
5457 // it. It will be erased when we erase BB below.
5458 continue;
5459
5460 // Otherwise, sink this PHI node into UnwindDest.
5461 // Any predecessors to UnwindDest which are not already represented
5462 // must be back edges which inherit the value from the path through
5463 // BB. In this case, the PHI value must reference itself.
5464 for (auto *pred : predecessors(UnwindDest))
5465 if (pred != BB)
5466 PN.addIncoming(&PN, pred);
5467 PN.moveBefore(InsertPt);
5468 // Also, add a dummy incoming value for the original BB itself,
5469 // so that the PHI is well-formed until we drop said predecessor.
5470 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5471 }
5472 }
5473
5474 std::vector<DominatorTree::UpdateType> Updates;
5475
5476 // We use make_early_inc_range here because we will remove all predecessors.
5478 if (UnwindDest == nullptr) {
5479 if (DTU) {
5480 DTU->applyUpdates(Updates);
5481 Updates.clear();
5482 }
5483 removeUnwindEdge(PredBB, DTU);
5484 ++NumInvokes;
5485 } else {
5486 BB->removePredecessor(PredBB);
5487 Instruction *TI = PredBB->getTerminator();
5488 TI->replaceUsesOfWith(BB, UnwindDest);
5489 if (DTU) {
5490 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5491 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5492 }
5493 }
5494 }
5495
5496 if (DTU)
5497 DTU->applyUpdates(Updates);
5498
5499 DeleteDeadBlock(BB, DTU);
5500
5501 return true;
5502}
5503
5504// Try to merge two cleanuppads together.
5506 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5507 // with.
5508 BasicBlock *UnwindDest = RI->getUnwindDest();
5509 if (!UnwindDest)
5510 return false;
5511
5512 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5513 // be safe to merge without code duplication.
5514 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5515 return false;
5516
5517 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5518 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5519 if (!SuccessorCleanupPad)
5520 return false;
5521
5522 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5523 // Replace any uses of the successor cleanupad with the predecessor pad
5524 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5525 // funclet bundle operands.
5526 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5527 // Remove the old cleanuppad.
5528 SuccessorCleanupPad->eraseFromParent();
5529 // Now, we simply replace the cleanupret with a branch to the unwind
5530 // destination.
5531 BranchInst::Create(UnwindDest, RI->getParent());
5532 RI->eraseFromParent();
5533
5534 return true;
5535}
5536
5537bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5538 // It is possible to transiantly have an undef cleanuppad operand because we
5539 // have deleted some, but not all, dead blocks.
5540 // Eventually, this block will be deleted.
5541 if (isa<UndefValue>(RI->getOperand(0)))
5542 return false;
5543
5544 if (mergeCleanupPad(RI))
5545 return true;
5546
5547 if (removeEmptyCleanup(RI, DTU))
5548 return true;
5549
5550 return false;
5551}
5552
5553// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5554bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5555 BasicBlock *BB = UI->getParent();
5556
5557 bool Changed = false;
5558
5559 // Ensure that any debug-info records that used to occur after the Unreachable
5560 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5561 // the block.
5563
5564 // Debug-info records on the unreachable inst itself should be deleted, as
5565 // below we delete everything past the final executable instruction.
5566 UI->dropDbgRecords();
5567
5568 // If there are any instructions immediately before the unreachable that can
5569 // be removed, do so.
5570 while (UI->getIterator() != BB->begin()) {
5572 --BBI;
5573
5575 break; // Can not drop any more instructions. We're done here.
5576 // Otherwise, this instruction can be freely erased,
5577 // even if it is not side-effect free.
5578
5579 // Note that deleting EH's here is in fact okay, although it involves a bit
5580 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5581 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5582 // and we can therefore guarantee this block will be erased.
5583
5584 // If we're deleting this, we're deleting any subsequent debug info, so
5585 // delete DbgRecords.
5586 BBI->dropDbgRecords();
5587
5588 // Delete this instruction (any uses are guaranteed to be dead)
5589 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5590 BBI->eraseFromParent();
5591 Changed = true;
5592 }
5593
5594 // If the unreachable instruction is the first in the block, take a gander
5595 // at all of the predecessors of this instruction, and simplify them.
5596 if (&BB->front() != UI)
5597 return Changed;
5598
5599 std::vector<DominatorTree::UpdateType> Updates;
5600
5602 for (BasicBlock *Predecessor : Preds) {
5603 Instruction *TI = Predecessor->getTerminator();
5604 IRBuilder<> Builder(TI);
5605 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5606 // We could either have a proper unconditional branch,
5607 // or a degenerate conditional branch with matching destinations.
5608 if (all_of(BI->successors(),
5609 [BB](auto *Successor) { return Successor == BB; })) {
5610 new UnreachableInst(TI->getContext(), TI->getIterator());
5611 TI->eraseFromParent();
5612 Changed = true;
5613 } else {
5614 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5615 Value* Cond = BI->getCondition();
5616 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5617 "The destinations are guaranteed to be different here.");
5618 CallInst *Assumption;
5619 if (BI->getSuccessor(0) == BB) {
5620 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5621 Builder.CreateBr(BI->getSuccessor(1));
5622 } else {
5623 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5624 Assumption = Builder.CreateAssumption(Cond);
5625 Builder.CreateBr(BI->getSuccessor(0));
5626 }
5627 if (Options.AC)
5628 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5629
5631 Changed = true;
5632 }
5633 if (DTU)
5634 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5635 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5637 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5638 if (i->getCaseSuccessor() != BB) {
5639 ++i;
5640 continue;
5641 }
5642 BB->removePredecessor(SU->getParent());
5643 i = SU.removeCase(i);
5644 e = SU->case_end();
5645 Changed = true;
5646 }
5647 // Note that the default destination can't be removed!
5648 if (DTU && SI->getDefaultDest() != BB)
5649 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5650 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5651 if (II->getUnwindDest() == BB) {
5652 if (DTU) {
5653 DTU->applyUpdates(Updates);
5654 Updates.clear();
5655 }
5656 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5657 if (!CI->doesNotThrow())
5658 CI->setDoesNotThrow();
5659 Changed = true;
5660 }
5661 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5662 if (CSI->getUnwindDest() == BB) {
5663 if (DTU) {
5664 DTU->applyUpdates(Updates);
5665 Updates.clear();
5666 }
5667 removeUnwindEdge(TI->getParent(), DTU);
5668 Changed = true;
5669 continue;
5670 }
5671
5672 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5673 E = CSI->handler_end();
5674 I != E; ++I) {
5675 if (*I == BB) {
5676 CSI->removeHandler(I);
5677 --I;
5678 --E;
5679 Changed = true;
5680 }
5681 }
5682 if (DTU)
5683 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5684 if (CSI->getNumHandlers() == 0) {
5685 if (CSI->hasUnwindDest()) {
5686 // Redirect all predecessors of the block containing CatchSwitchInst
5687 // to instead branch to the CatchSwitchInst's unwind destination.
5688 if (DTU) {
5689 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5690 Updates.push_back({DominatorTree::Insert,
5691 PredecessorOfPredecessor,
5692 CSI->getUnwindDest()});
5693 Updates.push_back({DominatorTree::Delete,
5694 PredecessorOfPredecessor, Predecessor});
5695 }
5696 }
5697 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5698 } else {
5699 // Rewrite all preds to unwind to caller (or from invoke to call).
5700 if (DTU) {
5701 DTU->applyUpdates(Updates);
5702 Updates.clear();
5703 }
5704 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5705 for (BasicBlock *EHPred : EHPreds)
5706 removeUnwindEdge(EHPred, DTU);
5707 }
5708 // The catchswitch is no longer reachable.
5709 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5710 CSI->eraseFromParent();
5711 Changed = true;
5712 }
5713 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5714 (void)CRI;
5715 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5716 "Expected to always have an unwind to BB.");
5717 if (DTU)
5718 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5719 new UnreachableInst(TI->getContext(), TI->getIterator());
5720 TI->eraseFromParent();
5721 Changed = true;
5722 }
5723 }
5724
5725 if (DTU)
5726 DTU->applyUpdates(Updates);
5727
5728 // If this block is now dead, remove it.
5729 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5730 DeleteDeadBlock(BB, DTU);
5731 return true;
5732 }
5733
5734 return Changed;
5735}
5736
5738 assert(Cases.size() >= 1);
5739
5741 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5742 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5743 return false;
5744 }
5745 return true;
5746}
5747
5749 DomTreeUpdater *DTU,
5750 bool RemoveOrigDefaultBlock = true) {
5751 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5752 auto *BB = Switch->getParent();
5753 auto *OrigDefaultBlock = Switch->getDefaultDest();
5754 if (RemoveOrigDefaultBlock)
5755 OrigDefaultBlock->removePredecessor(BB);
5756 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5757 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5758 OrigDefaultBlock);
5759 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5760 Switch->setDefaultDest(&*NewDefaultBlock);
5761 if (DTU) {
5763 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5764 if (RemoveOrigDefaultBlock &&
5765 !is_contained(successors(BB), OrigDefaultBlock))
5766 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5767 DTU->applyUpdates(Updates);
5768 }
5769}
5770
5771/// Turn a switch into an integer range comparison and branch.
5772/// Switches with more than 2 destinations are ignored.
5773/// Switches with 1 destination are also ignored.
5774bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5775 IRBuilder<> &Builder) {
5776 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5777
5778 bool HasDefault =
5779 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5780
5781 auto *BB = SI->getParent();
5782
5783 // Partition the cases into two sets with different destinations.
5784 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5785 BasicBlock *DestB = nullptr;
5788
5789 for (auto Case : SI->cases()) {
5790 BasicBlock *Dest = Case.getCaseSuccessor();
5791 if (!DestA)
5792 DestA = Dest;
5793 if (Dest == DestA) {
5794 CasesA.push_back(Case.getCaseValue());
5795 continue;
5796 }
5797 if (!DestB)
5798 DestB = Dest;
5799 if (Dest == DestB) {
5800 CasesB.push_back(Case.getCaseValue());
5801 continue;
5802 }
5803 return false; // More than two destinations.
5804 }
5805 if (!DestB)
5806 return false; // All destinations are the same and the default is unreachable
5807
5808 assert(DestA && DestB &&
5809 "Single-destination switch should have been folded.");
5810 assert(DestA != DestB);
5811 assert(DestB != SI->getDefaultDest());
5812 assert(!CasesB.empty() && "There must be non-default cases.");
5813 assert(!CasesA.empty() || HasDefault);
5814
5815 // Figure out if one of the sets of cases form a contiguous range.
5816 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5817 BasicBlock *ContiguousDest = nullptr;
5818 BasicBlock *OtherDest = nullptr;
5819 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5820 ContiguousCases = &CasesA;
5821 ContiguousDest = DestA;
5822 OtherDest = DestB;
5823 } else if (casesAreContiguous(CasesB)) {
5824 ContiguousCases = &CasesB;
5825 ContiguousDest = DestB;
5826 OtherDest = DestA;
5827 } else
5828 return false;
5829
5830 // Start building the compare and branch.
5831
5832 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5833 Constant *NumCases =
5834 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5835
5836 Value *Sub = SI->getCondition();
5837 if (!Offset->isNullValue())
5838 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5839
5840 Value *Cmp;
5841 // If NumCases overflowed, then all possible values jump to the successor.
5842 if (NumCases->isNullValue() && !ContiguousCases->empty())
5843 Cmp = ConstantInt::getTrue(SI->getContext());
5844 else
5845 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5846 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5847
5848 // Update weight for the newly-created conditional branch.
5849 if (hasBranchWeightMD(*SI)) {
5851 getBranchWeights(SI, Weights);
5852 if (Weights.size() == 1 + SI->getNumCases()) {
5853 uint64_t TrueWeight = 0;
5854 uint64_t FalseWeight = 0;
5855 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5856 if (SI->getSuccessor(I) == ContiguousDest)
5857 TrueWeight += Weights[I];
5858 else
5859 FalseWeight += Weights[I];
5860 }
5861 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5862 TrueWeight /= 2;
5863 FalseWeight /= 2;
5864 }
5865 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5866 }
5867 }
5868
5869 // Prune obsolete incoming values off the successors' PHI nodes.
5870 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5871 unsigned PreviousEdges = ContiguousCases->size();
5872 if (ContiguousDest == SI->getDefaultDest())
5873 ++PreviousEdges;
5874 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5875 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5876 }
5877 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5878 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5879 if (OtherDest == SI->getDefaultDest())
5880 ++PreviousEdges;
5881 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5882 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5883 }
5884
5885 // Clean up the default block - it may have phis or other instructions before
5886 // the unreachable terminator.
5887 if (!HasDefault)
5889
5890 auto *UnreachableDefault = SI->getDefaultDest();
5891
5892 // Drop the switch.
5893 SI->eraseFromParent();
5894
5895 if (!HasDefault && DTU)
5896 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5897
5898 return true;
5899}
5900
5901/// Compute masked bits for the condition of a switch
5902/// and use it to remove dead cases.
5904 AssumptionCache *AC,
5905 const DataLayout &DL) {
5906 Value *Cond = SI->getCondition();
5907 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5908
5909 // We can also eliminate cases by determining that their values are outside of
5910 // the limited range of the condition based on how many significant (non-sign)
5911 // bits are in the condition value.
5912 unsigned MaxSignificantBitsInCond =
5913 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5914
5915 // Gather dead cases.
5917 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5918 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5919 for (const auto &Case : SI->cases()) {
5920 auto *Successor = Case.getCaseSuccessor();
5921 if (DTU) {
5922 if (!NumPerSuccessorCases.count(Successor))
5923 UniqueSuccessors.push_back(Successor);
5924 ++NumPerSuccessorCases[Successor];
5925 }
5926 const APInt &CaseVal = Case.getCaseValue()->getValue();
5927 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5928 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5929 DeadCases.push_back(Case.getCaseValue());
5930 if (DTU)
5931 --NumPerSuccessorCases[Successor];
5932 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5933 << " is dead.\n");
5934 }
5935 }
5936
5937 // If we can prove that the cases must cover all possible values, the
5938 // default destination becomes dead and we can remove it. If we know some
5939 // of the bits in the value, we can use that to more precisely compute the
5940 // number of possible unique case values.
5941 bool HasDefault =
5942 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5943 const unsigned NumUnknownBits =
5944 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5945 assert(NumUnknownBits <= Known.getBitWidth());
5946 if (HasDefault && DeadCases.empty() &&
5947 NumUnknownBits < 64 /* avoid overflow */) {
5948 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5949 if (SI->getNumCases() == AllNumCases) {
5951 return true;
5952 }
5953 // When only one case value is missing, replace default with that case.
5954 // Eliminating the default branch will provide more opportunities for
5955 // optimization, such as lookup tables.
5956 if (SI->getNumCases() == AllNumCases - 1) {
5957 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5958 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5959 if (CondTy->getIntegerBitWidth() > 64 ||
5960 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5961 return false;
5962
5963 uint64_t MissingCaseVal = 0;
5964 for (const auto &Case : SI->cases())
5965 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5966 auto *MissingCase =
5967 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5969 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5970 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5971 SIW.setSuccessorWeight(0, 0);
5972 return true;
5973 }
5974 }
5975
5976 if (DeadCases.empty())
5977 return false;
5978
5980 for (ConstantInt *DeadCase : DeadCases) {
5981 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5982 assert(CaseI != SI->case_default() &&
5983 "Case was not found. Probably mistake in DeadCases forming.");
5984 // Prune unused values from PHI nodes.
5985 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5986 SIW.removeCase(CaseI);
5987 }
5988
5989 if (DTU) {
5990 std::vector<DominatorTree::UpdateType> Updates;
5991 for (auto *Successor : UniqueSuccessors)
5992 if (NumPerSuccessorCases[Successor] == 0)
5993 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5994 DTU->applyUpdates(Updates);
5995 }
5996
5997 return true;
5998}
5999
6000/// If BB would be eligible for simplification by
6001/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6002/// by an unconditional branch), look at the phi node for BB in the successor
6003/// block and see if the incoming value is equal to CaseValue. If so, return
6004/// the phi node, and set PhiIndex to BB's index in the phi node.
6006 BasicBlock *BB, int *PhiIndex) {
6007 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
6008 return nullptr; // BB must be empty to be a candidate for simplification.
6009 if (!BB->getSinglePredecessor())
6010 return nullptr; // BB must be dominated by the switch.
6011
6012 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
6013 if (!Branch || !Branch->isUnconditional())
6014 return nullptr; // Terminator must be unconditional branch.
6015
6016 BasicBlock *Succ = Branch->getSuccessor(0);
6017
6018 for (PHINode &PHI : Succ->phis()) {
6019 int Idx = PHI.getBasicBlockIndex(BB);
6020 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6021
6022 Value *InValue = PHI.getIncomingValue(Idx);
6023 if (InValue != CaseValue)
6024 continue;
6025
6026 *PhiIndex = Idx;
6027 return &PHI;
6028 }
6029
6030 return nullptr;
6031}
6032
6033/// Try to forward the condition of a switch instruction to a phi node
6034/// dominated by the switch, if that would mean that some of the destination
6035/// blocks of the switch can be folded away. Return true if a change is made.
6037 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6038
6039 ForwardingNodesMap ForwardingNodes;
6040 BasicBlock *SwitchBlock = SI->getParent();
6041 bool Changed = false;
6042 for (const auto &Case : SI->cases()) {
6043 ConstantInt *CaseValue = Case.getCaseValue();
6044 BasicBlock *CaseDest = Case.getCaseSuccessor();
6045
6046 // Replace phi operands in successor blocks that are using the constant case
6047 // value rather than the switch condition variable:
6048 // switchbb:
6049 // switch i32 %x, label %default [
6050 // i32 17, label %succ
6051 // ...
6052 // succ:
6053 // %r = phi i32 ... [ 17, %switchbb ] ...
6054 // -->
6055 // %r = phi i32 ... [ %x, %switchbb ] ...
6056
6057 for (PHINode &Phi : CaseDest->phis()) {
6058 // This only works if there is exactly 1 incoming edge from the switch to
6059 // a phi. If there is >1, that means multiple cases of the switch map to 1
6060 // value in the phi, and that phi value is not the switch condition. Thus,
6061 // this transform would not make sense (the phi would be invalid because
6062 // a phi can't have different incoming values from the same block).
6063 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6064 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6065 count(Phi.blocks(), SwitchBlock) == 1) {
6066 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6067 Changed = true;
6068 }
6069 }
6070
6071 // Collect phi nodes that are indirectly using this switch's case constants.
6072 int PhiIdx;
6073 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6074 ForwardingNodes[Phi].push_back(PhiIdx);
6075 }
6076
6077 for (auto &ForwardingNode : ForwardingNodes) {
6078 PHINode *Phi = ForwardingNode.first;
6079 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6080 // Check if it helps to fold PHI.
6081 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6082 continue;
6083
6084 for (int Index : Indexes)
6085 Phi->setIncomingValue(Index, SI->getCondition());
6086 Changed = true;
6087 }
6088
6089 return Changed;
6090}
6091
6092/// Return true if the backend will be able to handle
6093/// initializing an array of constants like C.
6095 if (C->isThreadDependent())
6096 return false;
6097 if (C->isDLLImportDependent())
6098 return false;
6099
6100 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6101 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6102 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6103 return false;
6104
6105 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6106 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6107 // materializing the array of constants.
6108 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6109 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6110 return false;
6111 }
6112
6114 return false;
6115
6116 return true;
6117}
6118
6119/// If V is a Constant, return it. Otherwise, try to look up
6120/// its constant value in ConstantPool, returning 0 if it's not there.
6121static Constant *
6124 if (Constant *C = dyn_cast<Constant>(V))
6125 return C;
6126 return ConstantPool.lookup(V);
6127}
6128
6129/// Try to fold instruction I into a constant. This works for
6130/// simple instructions such as binary operations where both operands are
6131/// constant or can be replaced by constants from the ConstantPool. Returns the
6132/// resulting constant on success, 0 otherwise.
6133static Constant *
6136 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6137 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6138 if (!A)
6139 return nullptr;
6140 if (A->isAllOnesValue())
6141 return lookupConstant(Select->getTrueValue(), ConstantPool);
6142 if (A->isNullValue())
6143 return lookupConstant(Select->getFalseValue(), ConstantPool);
6144 return nullptr;
6145 }
6146
6148 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6149 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6150 COps.push_back(A);
6151 else
6152 return nullptr;
6153 }
6154
6155 return ConstantFoldInstOperands(I, COps, DL);
6156}
6157
6158/// Try to determine the resulting constant values in phi nodes
6159/// at the common destination basic block, *CommonDest, for one of the case
6160/// destionations CaseDest corresponding to value CaseVal (0 for the default
6161/// case), of a switch instruction SI.
6162static bool
6164 BasicBlock **CommonDest,
6165 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6166 const DataLayout &DL, const TargetTransformInfo &TTI) {
6167 // The block from which we enter the common destination.
6168 BasicBlock *Pred = SI->getParent();
6169
6170 // If CaseDest is empty except for some side-effect free instructions through
6171 // which we can constant-propagate the CaseVal, continue to its successor.
6173 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6174 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6175 if (I.isTerminator()) {
6176 // If the terminator is a simple branch, continue to the next block.
6177 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6178 return false;
6179 Pred = CaseDest;
6180 CaseDest = I.getSuccessor(0);
6181 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6182 // Instruction is side-effect free and constant.
6183
6184 // If the instruction has uses outside this block or a phi node slot for
6185 // the block, it is not safe to bypass the instruction since it would then
6186 // no longer dominate all its uses.
6187 for (auto &Use : I.uses()) {
6188 User *User = Use.getUser();
6189 if (Instruction *I = dyn_cast<Instruction>(User))
6190 if (I->getParent() == CaseDest)
6191 continue;
6192 if (PHINode *Phi = dyn_cast<PHINode>(User))
6193 if (Phi->getIncomingBlock(Use) == CaseDest)
6194 continue;
6195 return false;
6196 }
6197
6198 ConstantPool.insert(std::make_pair(&I, C));
6199 } else {
6200 break;
6201 }
6202 }
6203
6204 // If we did not have a CommonDest before, use the current one.
6205 if (!*CommonDest)
6206 *CommonDest = CaseDest;
6207 // If the destination isn't the common one, abort.
6208 if (CaseDest != *CommonDest)
6209 return false;
6210
6211 // Get the values for this case from phi nodes in the destination block.
6212 for (PHINode &PHI : (*CommonDest)->phis()) {
6213 int Idx = PHI.getBasicBlockIndex(Pred);
6214 if (Idx == -1)
6215 continue;
6216
6217 Constant *ConstVal =
6218 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6219 if (!ConstVal)
6220 return false;
6221
6222 // Be conservative about which kinds of constants we support.
6223 if (!validLookupTableConstant(ConstVal, TTI))
6224 return false;
6225
6226 Res.push_back(std::make_pair(&PHI, ConstVal));
6227 }
6228
6229 return Res.size() > 0;
6230}
6231
6232// Helper function used to add CaseVal to the list of cases that generate
6233// Result. Returns the updated number of cases that generate this result.
6234static size_t mapCaseToResult(ConstantInt *CaseVal,
6235 SwitchCaseResultVectorTy &UniqueResults,
6236 Constant *Result) {
6237 for (auto &I : UniqueResults) {
6238 if (I.first == Result) {
6239 I.second.push_back(CaseVal);
6240 return I.second.size();
6241 }
6242 }
6243 UniqueResults.push_back(
6244 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6245 return 1;
6246}
6247
6248// Helper function that initializes a map containing
6249// results for the PHI node of the common destination block for a switch
6250// instruction. Returns false if multiple PHI nodes have been found or if
6251// there is not a common destination block for the switch.
6253 BasicBlock *&CommonDest,
6254 SwitchCaseResultVectorTy &UniqueResults,
6255 Constant *&DefaultResult,
6256 const DataLayout &DL,
6257 const TargetTransformInfo &TTI,
6258 uintptr_t MaxUniqueResults) {
6259 for (const auto &I : SI->cases()) {
6260 ConstantInt *CaseVal = I.getCaseValue();
6261
6262 // Resulting value at phi nodes for this case value.
6263 SwitchCaseResultsTy Results;
6264 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6265 DL, TTI))
6266 return false;
6267
6268 // Only one value per case is permitted.
6269 if (Results.size() > 1)
6270 return false;
6271
6272 // Add the case->result mapping to UniqueResults.
6273 const size_t NumCasesForResult =
6274 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6275
6276 // Early out if there are too many cases for this result.
6277 if (NumCasesForResult > MaxSwitchCasesPerResult)
6278 return false;
6279
6280 // Early out if there are too many unique results.
6281 if (UniqueResults.size() > MaxUniqueResults)
6282 return false;
6283
6284 // Check the PHI consistency.
6285 if (!PHI)
6286 PHI = Results[0].first;
6287 else if (PHI != Results[0].first)
6288 return false;
6289 }
6290 // Find the default result value.
6292 BasicBlock *DefaultDest = SI->getDefaultDest();
6293 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6294 DL, TTI);
6295 // If the default value is not found abort unless the default destination
6296 // is unreachable.
6297 DefaultResult =
6298 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6299 if ((!DefaultResult &&
6300 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6301 return false;
6302
6303 return true;
6304}
6305
6306// Helper function that checks if it is possible to transform a switch with only
6307// two cases (or two cases + default) that produces a result into a select.
6308// TODO: Handle switches with more than 2 cases that map to the same result.
6309static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6310 Constant *DefaultResult, Value *Condition,
6311 IRBuilder<> &Builder) {
6312 // If we are selecting between only two cases transform into a simple
6313 // select or a two-way select if default is possible.
6314 // Example:
6315 // switch (a) { %0 = icmp eq i32 %a, 10
6316 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6317 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6318 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6319 // }
6320 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6321 ResultVector[1].second.size() == 1) {
6322 ConstantInt *FirstCase = ResultVector[0].second[0];
6323 ConstantInt *SecondCase = ResultVector[1].second[0];
6324 Value *SelectValue = ResultVector[1].first;
6325 if (DefaultResult) {
6326 Value *ValueCompare =
6327 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6328 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6329 DefaultResult, "switch.select");
6330 }
6331 Value *ValueCompare =
6332 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6333 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6334 SelectValue, "switch.select");
6335 }
6336
6337 // Handle the degenerate case where two cases have the same result value.
6338 if (ResultVector.size() == 1 && DefaultResult) {
6339 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6340 unsigned CaseCount = CaseValues.size();
6341 // n bits group cases map to the same result:
6342 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6343 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6344 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6345 if (isPowerOf2_32(CaseCount)) {
6346 ConstantInt *MinCaseVal = CaseValues[0];
6347 // Find mininal value.
6348 for (auto *Case : CaseValues)
6349 if (Case->getValue().slt(MinCaseVal->getValue()))
6350 MinCaseVal = Case;
6351
6352 // Mark the bits case number touched.
6353 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6354 for (auto *Case : CaseValues)
6355 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6356
6357 // Check if cases with the same result can cover all number
6358 // in touched bits.
6359 if (BitMask.popcount() == Log2_32(CaseCount)) {
6360 if (!MinCaseVal->isNullValue())
6361 Condition = Builder.CreateSub(Condition, MinCaseVal);
6362 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6363 Value *Cmp = Builder.CreateICmpEQ(
6364 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6365 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6366 }
6367 }
6368
6369 // Handle the degenerate case where two cases have the same value.
6370 if (CaseValues.size() == 2) {
6371 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6372 "switch.selectcmp.case1");
6373 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6374 "switch.selectcmp.case2");
6375 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6376 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6377 }
6378 }
6379
6380 return nullptr;
6381}
6382
6383// Helper function to cleanup a switch instruction that has been converted into
6384// a select, fixing up PHI nodes and basic blocks.
6386 Value *SelectValue,
6387 IRBuilder<> &Builder,
6388 DomTreeUpdater *DTU) {
6389 std::vector<DominatorTree::UpdateType> Updates;
6390
6391 BasicBlock *SelectBB = SI->getParent();
6392 BasicBlock *DestBB = PHI->getParent();
6393
6394 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6395 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6396 Builder.CreateBr(DestBB);
6397
6398 // Remove the switch.
6399
6400 PHI->removeIncomingValueIf(
6401 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6402 PHI->addIncoming(SelectValue, SelectBB);
6403
6404 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6405 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6406 BasicBlock *Succ = SI->getSuccessor(i);
6407
6408 if (Succ == DestBB)
6409 continue;
6410 Succ->removePredecessor(SelectBB);
6411 if (DTU && RemovedSuccessors.insert(Succ).second)
6412 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6413 }
6414 SI->eraseFromParent();
6415 if (DTU)
6416 DTU->applyUpdates(Updates);
6417}
6418
6419/// If a switch is only used to initialize one or more phi nodes in a common
6420/// successor block with only two different constant values, try to replace the
6421/// switch with a select. Returns true if the fold was made.
6422static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6423 DomTreeUpdater *DTU, const DataLayout &DL,
6424 const TargetTransformInfo &TTI) {
6425 Value *const Cond = SI->getCondition();
6426 PHINode *PHI = nullptr;
6427 BasicBlock *CommonDest = nullptr;
6428 Constant *DefaultResult;
6429 SwitchCaseResultVectorTy UniqueResults;
6430 // Collect all the cases that will deliver the same value from the switch.
6431 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6432 DL, TTI, /*MaxUniqueResults*/ 2))
6433 return false;
6434
6435 assert(PHI != nullptr && "PHI for value select not found");
6436 Builder.SetInsertPoint(SI);
6437 Value *SelectValue =
6438 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6439 if (!SelectValue)
6440 return false;
6441
6442 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6443 return true;
6444}
6445
6446namespace {
6447
6448/// This class represents a lookup table that can be used to replace a switch.
6449class SwitchLookupTable {
6450public:
6451 /// Create a lookup table to use as a switch replacement with the contents
6452 /// of Values, using DefaultValue to fill any holes in the table.
6453 SwitchLookupTable(
6454 Module &M, uint64_t TableSize, ConstantInt *Offset,
6455 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6456 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6457
6458 /// Build instructions with Builder to retrieve the value at
6459 /// the position given by Index in the lookup table.
6460 Value *buildLookup(Value *Index, IRBuilder<> &Builder);
6461
6462 /// Return true if a table with TableSize elements of
6463 /// type ElementType would fit in a target-legal register.
6464 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6465 Type *ElementType);
6466
6467private:
6468 // Depending on the contents of the table, it can be represented in
6469 // different ways.
6470 enum {
6471 // For tables where each element contains the same value, we just have to
6472 // store that single value and return it for each lookup.
6473 SingleValueKind,
6474
6475 // For tables where there is a linear relationship between table index
6476 // and values. We calculate the result with a simple multiplication
6477 // and addition instead of a table lookup.
6478 LinearMapKind,
6479
6480 // For small tables with integer elements, we can pack them into a bitmap
6481 // that fits into a target-legal register. Values are retrieved by
6482 // shift and mask operations.
6483 BitMapKind,
6484
6485 // The table is stored as an array of values. Values are retrieved by load
6486 // instructions from the table.
6487 ArrayKind
6488 } Kind;
6489
6490 // For SingleValueKind, this is the single value.
6491 Constant *SingleValue = nullptr;
6492
6493 // For BitMapKind, this is the bitmap.
6494 ConstantInt *BitMap = nullptr;
6495 IntegerType *BitMapElementTy = nullptr;
6496
6497 // For LinearMapKind, these are the constants used to derive the value.
6498 ConstantInt *LinearOffset = nullptr;
6499 ConstantInt *LinearMultiplier = nullptr;
6500 bool LinearMapValWrapped = false;
6501
6502 // For ArrayKind, this is the array.
6503 GlobalVariable *Array = nullptr;
6504};
6505
6506} // end anonymous namespace
6507
6508SwitchLookupTable::SwitchLookupTable(
6509 Module &M, uint64_t TableSize, ConstantInt *Offset,
6510 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6511 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6512 assert(Values.size() && "Can't build lookup table without values!");
6513 assert(TableSize >= Values.size() && "Can't fit values in table!");
6514
6515 // If all values in the table are equal, this is that value.
6516 SingleValue = Values.begin()->second;
6517
6518 Type *ValueType = Values.begin()->second->getType();
6519
6520 // Build up the table contents.
6521 SmallVector<Constant *, 64> TableContents(TableSize);
6522 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6523 ConstantInt *CaseVal = Values[I].first;
6524 Constant *CaseRes = Values[I].second;
6525 assert(CaseRes->getType() == ValueType);
6526
6527 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6528 TableContents[Idx] = CaseRes;
6529
6530 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6531 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6532 }
6533
6534 // Fill in any holes in the table with the default result.
6535 if (Values.size() < TableSize) {
6536 assert(DefaultValue &&
6537 "Need a default value to fill the lookup table holes.");
6538 assert(DefaultValue->getType() == ValueType);
6539 for (uint64_t I = 0; I < TableSize; ++I) {
6540 if (!TableContents[I])
6541 TableContents[I] = DefaultValue;
6542 }
6543
6544 // If the default value is poison, all the holes are poison.
6545 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6546
6547 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6548 SingleValue = nullptr;
6549 }
6550
6551 // If each element in the table contains the same value, we only need to store
6552 // that single value.
6553 if (SingleValue) {
6554 Kind = SingleValueKind;
6555 return;
6556 }
6557
6558 // Check if we can derive the value with a linear transformation from the
6559 // table index.
6560 if (isa<IntegerType>(ValueType)) {
6561 bool LinearMappingPossible = true;
6562 APInt PrevVal;
6563 APInt DistToPrev;
6564 // When linear map is monotonic and signed overflow doesn't happen on
6565 // maximum index, we can attach nsw on Add and Mul.
6566 bool NonMonotonic = false;
6567 assert(TableSize >= 2 && "Should be a SingleValue table.");
6568 // Check if there is the same distance between two consecutive values.
6569 for (uint64_t I = 0; I < TableSize; ++I) {
6570 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6571
6572 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6573 // This is an poison, so it's (probably) a lookup table hole.
6574 // To prevent any regressions from before we switched to using poison as
6575 // the default value, holes will fall back to using the first value.
6576 // This can be removed once we add proper handling for poisons in lookup
6577 // tables.
6578 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6579 }
6580
6581 if (!ConstVal) {
6582 // This is an undef. We could deal with it, but undefs in lookup tables
6583 // are very seldom. It's probably not worth the additional complexity.
6584 LinearMappingPossible = false;
6585 break;
6586 }
6587 const APInt &Val = ConstVal->getValue();
6588 if (I != 0) {
6589 APInt Dist = Val - PrevVal;
6590 if (I == 1) {
6591 DistToPrev = Dist;
6592 } else if (Dist != DistToPrev) {
6593 LinearMappingPossible = false;
6594 break;
6595 }
6596 NonMonotonic |=
6597 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6598 }
6599 PrevVal = Val;
6600 }
6601 if (LinearMappingPossible) {
6602 LinearOffset = cast<ConstantInt>(TableContents[0]);
6603 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6604 APInt M = LinearMultiplier->getValue();
6605 bool MayWrap = true;
6606 if (isIntN(M.getBitWidth(), TableSize - 1))
6607 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6608 LinearMapValWrapped = NonMonotonic || MayWrap;
6609 Kind = LinearMapKind;
6610 ++NumLinearMaps;
6611 return;
6612 }
6613 }
6614
6615 // If the type is integer and the table fits in a register, build a bitmap.
6616 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6617 IntegerType *IT = cast<IntegerType>(ValueType);
6618 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6619 for (uint64_t I = TableSize; I > 0; --I) {
6620 TableInt <<= IT->getBitWidth();
6621 // Insert values into the bitmap. Undef values are set to zero.
6622 if (!isa<UndefValue>(TableContents[I - 1])) {
6623 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6624 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6625 }
6626 }
6627 BitMap = ConstantInt::get(M.getContext(), TableInt);
6628 BitMapElementTy = IT;
6629 Kind = BitMapKind;
6630 ++NumBitMaps;
6631 return;
6632 }
6633
6634 // Store the table in an array.
6635 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6636 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6637
6638 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6639 GlobalVariable::PrivateLinkage, Initializer,
6640 "switch.table." + FuncName);
6641 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6642 // Set the alignment to that of an array items. We will be only loading one
6643 // value out of it.
6644 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6645 Kind = ArrayKind;
6646}
6647
6648Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) {
6649 switch (Kind) {
6650 case SingleValueKind:
6651 return SingleValue;
6652 case LinearMapKind: {
6653 // Derive the result value from the input value.
6654 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6655 false, "switch.idx.cast");
6656 if (!LinearMultiplier->isOne())
6657 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6658 /*HasNUW = */ false,
6659 /*HasNSW = */ !LinearMapValWrapped);
6660
6661 if (!LinearOffset->isZero())
6662 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6663 /*HasNUW = */ false,
6664 /*HasNSW = */ !LinearMapValWrapped);
6665 return Result;
6666 }
6667 case BitMapKind: {
6668 // Type of the bitmap (e.g. i59).
6669 IntegerType *MapTy = BitMap->getIntegerType();
6670
6671 // Cast Index to the same type as the bitmap.
6672 // Note: The Index is <= the number of elements in the table, so
6673 // truncating it to the width of the bitmask is safe.
6674 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6675
6676 // Multiply the shift amount by the element width. NUW/NSW can always be
6677 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6678 // BitMap's bit width.
6679 ShiftAmt = Builder.CreateMul(
6680 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6681 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6682
6683 // Shift down.
6684 Value *DownShifted =
6685 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6686 // Mask off.
6687 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6688 }
6689 case ArrayKind: {
6690 // Make sure the table index will not overflow when treated as signed.
6691 IntegerType *IT = cast<IntegerType>(Index->getType());
6692 uint64_t TableSize =
6693 Array->getInitializer()->getType()->getArrayNumElements();
6694 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6695 Index = Builder.CreateZExt(
6696 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6697 "switch.tableidx.zext");
6698
6699 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6700 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6701 GEPIndices, "switch.gep");
6702 return Builder.CreateLoad(
6703 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6704 "switch.load");
6705 }
6706 }
6707 llvm_unreachable("Unknown lookup table kind!");
6708}
6709
6710bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6711 uint64_t TableSize,
6712 Type *ElementType) {
6713 auto *IT = dyn_cast<IntegerType>(ElementType);
6714 if (!IT)
6715 return false;
6716 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6717 // are <= 15, we could try to narrow the type.
6718
6719 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6720 if (TableSize >= UINT_MAX / IT->getBitWidth())
6721 return false;
6722 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6723}
6724
6726 const DataLayout &DL) {
6727 // Allow any legal type.
6728 if (TTI.isTypeLegal(Ty))
6729 return true;
6730
6731 auto *IT = dyn_cast<IntegerType>(Ty);
6732 if (!IT)
6733 return false;
6734
6735 // Also allow power of 2 integer types that have at least 8 bits and fit in
6736 // a register. These types are common in frontend languages and targets
6737 // usually support loads of these types.
6738 // TODO: We could relax this to any integer that fits in a register and rely
6739 // on ABI alignment and padding in the table to allow the load to be widened.
6740 // Or we could widen the constants and truncate the load.
6741 unsigned BitWidth = IT->getBitWidth();
6742 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6743 DL.fitsInLegalInteger(IT->getBitWidth());
6744}
6745
6746static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6747 // 40% is the default density for building a jump table in optsize/minsize
6748 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6749 // function was based on.
6750 const uint64_t MinDensity = 40;
6751
6752 if (CaseRange >= UINT64_MAX / 100)
6753 return false; // Avoid multiplication overflows below.
6754
6755 return NumCases * 100 >= CaseRange * MinDensity;
6756}
6757
6759 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6760 uint64_t Range = Diff + 1;
6761 if (Range < Diff)
6762 return false; // Overflow.
6763
6764 return isSwitchDense(Values.size(), Range);
6765}
6766
6767/// Determine whether a lookup table should be built for this switch, based on
6768/// the number of cases, size of the table, and the types of the results.
6769// TODO: We could support larger than legal types by limiting based on the
6770// number of loads required and/or table size. If the constants are small we
6771// could use smaller table entries and extend after the load.
6772static bool
6774 const TargetTransformInfo &TTI, const DataLayout &DL,
6775 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6776 if (SI->getNumCases() > TableSize)
6777 return false; // TableSize overflowed.
6778
6779 bool AllTablesFitInRegister = true;
6780 bool HasIllegalType = false;
6781 for (const auto &I : ResultTypes) {
6782 Type *Ty = I.second;
6783
6784 // Saturate this flag to true.
6785 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6786
6787 // Saturate this flag to false.
6788 AllTablesFitInRegister =
6789 AllTablesFitInRegister &&
6790 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6791
6792 // If both flags saturate, we're done. NOTE: This *only* works with
6793 // saturating flags, and all flags have to saturate first due to the
6794 // non-deterministic behavior of iterating over a dense map.
6795 if (HasIllegalType && !AllTablesFitInRegister)
6796 break;
6797 }
6798
6799 // If each table would fit in a register, we should build it anyway.
6800 if (AllTablesFitInRegister)
6801 return true;
6802
6803 // Don't build a table that doesn't fit in-register if it has illegal types.
6804 if (HasIllegalType)
6805 return false;
6806
6807 return isSwitchDense(SI->getNumCases(), TableSize);
6808}
6809
6811 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6812 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6813 const DataLayout &DL, const TargetTransformInfo &TTI) {
6814 if (MinCaseVal.isNullValue())
6815 return true;
6816 if (MinCaseVal.isNegative() ||
6817 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6818 !HasDefaultResults)
6819 return false;
6820 return all_of(ResultTypes, [&](const auto &KV) {
6821 return SwitchLookupTable::wouldFitInRegister(
6822 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6823 KV.second /* ResultType */);
6824 });
6825}
6826
6827/// Try to reuse the switch table index compare. Following pattern:
6828/// \code
6829/// if (idx < tablesize)
6830/// r = table[idx]; // table does not contain default_value
6831/// else
6832/// r = default_value;
6833/// if (r != default_value)
6834/// ...
6835/// \endcode
6836/// Is optimized to:
6837/// \code
6838/// cond = idx < tablesize;
6839/// if (cond)
6840/// r = table[idx];
6841/// else
6842/// r = default_value;
6843/// if (cond)
6844/// ...
6845/// \endcode
6846/// Jump threading will then eliminate the second if(cond).
6848 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6849 Constant *DefaultValue,
6850 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6851 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6852 if (!CmpInst)
6853 return;
6854
6855 // We require that the compare is in the same block as the phi so that jump
6856 // threading can do its work afterwards.
6857 if (CmpInst->getParent() != PhiBlock)
6858 return;
6859
6860 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6861 if (!CmpOp1)
6862 return;
6863
6864 Value *RangeCmp = RangeCheckBranch->getCondition();
6865 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6866 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6867
6868 // Check if the compare with the default value is constant true or false.
6869 const DataLayout &DL = PhiBlock->getDataLayout();
6871 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6872 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6873 return;
6874
6875 // Check if the compare with the case values is distinct from the default
6876 // compare result.
6877 for (auto ValuePair : Values) {
6879 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6880 if (!CaseConst || CaseConst == DefaultConst ||
6881 (CaseConst != TrueConst && CaseConst != FalseConst))
6882 return;
6883 }
6884
6885 // Check if the branch instruction dominates the phi node. It's a simple
6886 // dominance check, but sufficient for our needs.
6887 // Although this check is invariant in the calling loops, it's better to do it
6888 // at this late stage. Practically we do it at most once for a switch.
6889 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6890 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6891 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6892 return;
6893 }
6894
6895 if (DefaultConst == FalseConst) {
6896 // The compare yields the same result. We can replace it.
6897 CmpInst->replaceAllUsesWith(RangeCmp);
6898 ++NumTableCmpReuses;
6899 } else {
6900 // The compare yields the same result, just inverted. We can replace it.
6901 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6902 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6903 RangeCheckBranch->getIterator());
6904 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6905 ++NumTableCmpReuses;
6906 }
6907}
6908
6909/// If the switch is only used to initialize one or more phi nodes in a common
6910/// successor block with different constant values, replace the switch with
6911/// lookup tables.
6913 DomTreeUpdater *DTU, const DataLayout &DL,
6914 const TargetTransformInfo &TTI) {
6915 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6916
6917 BasicBlock *BB = SI->getParent();
6918 Function *Fn = BB->getParent();
6919 // Only build lookup table when we have a target that supports it or the
6920 // attribute is not set.
6922 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6923 return false;
6924
6925 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6926 // split off a dense part and build a lookup table for that.
6927
6928 // FIXME: This creates arrays of GEPs to constant strings, which means each
6929 // GEP needs a runtime relocation in PIC code. We should just build one big
6930 // string and lookup indices into that.
6931
6932 // Ignore switches with less than three cases. Lookup tables will not make
6933 // them faster, so we don't analyze them.
6934 if (SI->getNumCases() < 3)
6935 return false;
6936
6937 // Figure out the corresponding result for each case value and phi node in the
6938 // common destination, as well as the min and max case values.
6939 assert(!SI->cases().empty());
6940 SwitchInst::CaseIt CI = SI->case_begin();
6941 ConstantInt *MinCaseVal = CI->getCaseValue();
6942 ConstantInt *MaxCaseVal = CI->getCaseValue();
6943
6944 BasicBlock *CommonDest = nullptr;
6945
6946 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6948
6952
6953 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6954 ConstantInt *CaseVal = CI->getCaseValue();
6955 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6956 MinCaseVal = CaseVal;
6957 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6958 MaxCaseVal = CaseVal;
6959
6960 // Resulting value at phi nodes for this case value.
6962 ResultsTy Results;
6963 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6964 Results, DL, TTI))
6965 return false;
6966
6967 // Append the result from this case to the list for each phi.
6968 for (const auto &I : Results) {
6969 PHINode *PHI = I.first;
6970 Constant *Value = I.second;
6971 if (!ResultLists.count(PHI))
6972 PHIs.push_back(PHI);
6973 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6974 }
6975 }
6976
6977 // Keep track of the result types.
6978 for (PHINode *PHI : PHIs) {
6979 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6980 }
6981
6982 uint64_t NumResults = ResultLists[PHIs[0]].size();
6983
6984 // If the table has holes, we need a constant result for the default case
6985 // or a bitmask that fits in a register.
6986 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6987 bool HasDefaultResults =
6988 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6989 DefaultResultsList, DL, TTI);
6990
6991 for (const auto &I : DefaultResultsList) {
6992 PHINode *PHI = I.first;
6993 Constant *Result = I.second;
6994 DefaultResults[PHI] = Result;
6995 }
6996
6997 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6998 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6999 uint64_t TableSize;
7000 if (UseSwitchConditionAsTableIndex)
7001 TableSize = MaxCaseVal->getLimitedValue() + 1;
7002 else
7003 TableSize =
7004 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7005
7006 // If the default destination is unreachable, or if the lookup table covers
7007 // all values of the conditional variable, branch directly to the lookup table
7008 // BB. Otherwise, check that the condition is within the case range.
7009 bool DefaultIsReachable = !SI->defaultDestUndefined();
7010
7011 bool TableHasHoles = (NumResults < TableSize);
7012
7013 // If the table has holes but the default destination doesn't produce any
7014 // constant results, the lookup table entries corresponding to the holes will
7015 // contain poison.
7016 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7017
7018 // If the default destination doesn't produce a constant result but is still
7019 // reachable, and the lookup table has holes, we need to use a mask to
7020 // determine if the current index should load from the lookup table or jump
7021 // to the default case.
7022 // The mask is unnecessary if the table has holes but the default destination
7023 // is unreachable, as in that case the holes must also be unreachable.
7024 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7025 if (NeedMask) {
7026 // As an extra penalty for the validity test we require more cases.
7027 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7028 return false;
7029 if (!DL.fitsInLegalInteger(TableSize))
7030 return false;
7031 }
7032
7033 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7034 return false;
7035
7036 std::vector<DominatorTree::UpdateType> Updates;
7037
7038 // Compute the maximum table size representable by the integer type we are
7039 // switching upon.
7040 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7041 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7042 assert(MaxTableSize >= TableSize &&
7043 "It is impossible for a switch to have more entries than the max "
7044 "representable value of its input integer type's size.");
7045
7046 // Create the BB that does the lookups.
7047 Module &Mod = *CommonDest->getParent()->getParent();
7048 BasicBlock *LookupBB = BasicBlock::Create(
7049 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7050
7051 // Compute the table index value.
7052 Builder.SetInsertPoint(SI);
7053 Value *TableIndex;
7054 ConstantInt *TableIndexOffset;
7055 if (UseSwitchConditionAsTableIndex) {
7056 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7057 TableIndex = SI->getCondition();
7058 } else {
7059 TableIndexOffset = MinCaseVal;
7060 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7061 // we can try to attach nsw.
7062 bool MayWrap = true;
7063 if (!DefaultIsReachable) {
7064 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7065 (void)Res;
7066 }
7067
7068 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7069 "switch.tableidx", /*HasNUW =*/false,
7070 /*HasNSW =*/!MayWrap);
7071 }
7072
7073 BranchInst *RangeCheckBranch = nullptr;
7074
7075 // Grow the table to cover all possible index values to avoid the range check.
7076 // It will use the default result to fill in the table hole later, so make
7077 // sure it exist.
7078 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7079 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
7080 // Grow the table shouldn't have any size impact by checking
7081 // wouldFitInRegister.
7082 // TODO: Consider growing the table also when it doesn't fit in a register
7083 // if no optsize is specified.
7084 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7085 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
7086 return SwitchLookupTable::wouldFitInRegister(
7087 DL, UpperBound, KV.second /* ResultType */);
7088 })) {
7089 // There may be some case index larger than the UpperBound (unreachable
7090 // case), so make sure the table size does not get smaller.
7091 TableSize = std::max(UpperBound, TableSize);
7092 // The default branch is unreachable after we enlarge the lookup table.
7093 // Adjust DefaultIsReachable to reuse code path.
7094 DefaultIsReachable = false;
7095 }
7096 }
7097
7098 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7099 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7100 Builder.CreateBr(LookupBB);
7101 if (DTU)
7102 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7103 // Note: We call removeProdecessor later since we need to be able to get the
7104 // PHI value for the default case in case we're using a bit mask.
7105 } else {
7106 Value *Cmp = Builder.CreateICmpULT(
7107 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7108 RangeCheckBranch =
7109 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7110 if (DTU)
7111 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7112 }
7113
7114 // Populate the BB that does the lookups.
7115 Builder.SetInsertPoint(LookupBB);
7116
7117 if (NeedMask) {
7118 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7119 // re-purposed to do the hole check, and we create a new LookupBB.
7120 BasicBlock *MaskBB = LookupBB;
7121 MaskBB->setName("switch.hole_check");
7122 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7123 CommonDest->getParent(), CommonDest);
7124
7125 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7126 // unnecessary illegal types.
7127 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7128 APInt MaskInt(TableSizePowOf2, 0);
7129 APInt One(TableSizePowOf2, 1);
7130 // Build bitmask; fill in a 1 bit for every case.
7131 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7132 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
7133 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
7134 .getLimitedValue();
7135 MaskInt |= One << Idx;
7136 }
7137 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7138
7139 // Get the TableIndex'th bit of the bitmask.
7140 // If this bit is 0 (meaning hole) jump to the default destination,
7141 // else continue with table lookup.
7142 IntegerType *MapTy = TableMask->getIntegerType();
7143 Value *MaskIndex =
7144 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7145 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7146 Value *LoBit = Builder.CreateTrunc(
7147 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7148 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7149 if (DTU) {
7150 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7151 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7152 }
7153 Builder.SetInsertPoint(LookupBB);
7154 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7155 }
7156
7157 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7158 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7159 // do not delete PHINodes here.
7160 SI->getDefaultDest()->removePredecessor(BB,
7161 /*KeepOneInputPHIs=*/true);
7162 if (DTU)
7163 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7164 }
7165
7166 for (PHINode *PHI : PHIs) {
7167 const ResultListTy &ResultList = ResultLists[PHI];
7168
7169 Type *ResultType = ResultList.begin()->second->getType();
7170
7171 // Use any value to fill the lookup table holes.
7172 Constant *DV =
7173 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7174 StringRef FuncName = Fn->getName();
7175 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7176 DL, FuncName);
7177
7178 Value *Result = Table.buildLookup(TableIndex, Builder);
7179
7180 // Do a small peephole optimization: re-use the switch table compare if
7181 // possible.
7182 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7183 BasicBlock *PhiBlock = PHI->getParent();
7184 // Search for compare instructions which use the phi.
7185 for (auto *User : PHI->users()) {
7186 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
7187 }
7188 }
7189
7190 PHI->addIncoming(Result, LookupBB);
7191 }
7192
7193 Builder.CreateBr(CommonDest);
7194 if (DTU)
7195 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7196
7197 // Remove the switch.
7198 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7199 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7200 BasicBlock *Succ = SI->getSuccessor(i);
7201
7202 if (Succ == SI->getDefaultDest())
7203 continue;
7204 Succ->removePredecessor(BB);
7205 if (DTU && RemovedSuccessors.insert(Succ).second)
7206 Updates.push_back({DominatorTree::Delete, BB, Succ});
7207 }
7208 SI->eraseFromParent();
7209
7210 if (DTU)
7211 DTU->applyUpdates(Updates);
7212
7213 ++NumLookupTables;
7214 if (NeedMask)
7215 ++NumLookupTablesHoles;
7216 return true;
7217}
7218
7219/// Try to transform a switch that has "holes" in it to a contiguous sequence
7220/// of cases.
7221///
7222/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7223/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7224///
7225/// This converts a sparse switch into a dense switch which allows better
7226/// lowering and could also allow transforming into a lookup table.
7227static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7228 const DataLayout &DL,
7229 const TargetTransformInfo &TTI) {
7230 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7231 if (CondTy->getIntegerBitWidth() > 64 ||
7232 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7233 return false;
7234 // Only bother with this optimization if there are more than 3 switch cases;
7235 // SDAG will only bother creating jump tables for 4 or more cases.
7236 if (SI->getNumCases() < 4)
7237 return false;
7238
7239 // This transform is agnostic to the signedness of the input or case values. We
7240 // can treat the case values as signed or unsigned. We can optimize more common
7241 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7242 // as signed.
7244 for (const auto &C : SI->cases())
7245 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7246 llvm::sort(Values);
7247
7248 // If the switch is already dense, there's nothing useful to do here.
7249 if (isSwitchDense(Values))
7250 return false;
7251
7252 // First, transform the values such that they start at zero and ascend.
7253 int64_t Base = Values[0];
7254 for (auto &V : Values)
7255 V -= (uint64_t)(Base);
7256
7257 // Now we have signed numbers that have been shifted so that, given enough
7258 // precision, there are no negative values. Since the rest of the transform
7259 // is bitwise only, we switch now to an unsigned representation.
7260
7261 // This transform can be done speculatively because it is so cheap - it
7262 // results in a single rotate operation being inserted.
7263
7264 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7265 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7266 // less than 64.
7267 unsigned Shift = 64;
7268 for (auto &V : Values)
7269 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7270 assert(Shift < 64);
7271 if (Shift > 0)
7272 for (auto &V : Values)
7273 V = (int64_t)((uint64_t)V >> Shift);
7274
7275 if (!isSwitchDense(Values))
7276 // Transform didn't create a dense switch.
7277 return false;
7278
7279 // The obvious transform is to shift the switch condition right and emit a
7280 // check that the condition actually cleanly divided by GCD, i.e.
7281 // C & (1 << Shift - 1) == 0
7282 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7283 //
7284 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7285 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7286 // are nonzero then the switch condition will be very large and will hit the
7287 // default case.
7288
7289 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7290 Builder.SetInsertPoint(SI);
7291 Value *Sub =
7292 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7293 Value *Rot = Builder.CreateIntrinsic(
7294 Ty, Intrinsic::fshl,
7295 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7296 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7297
7298 for (auto Case : SI->cases()) {
7299 auto *Orig = Case.getCaseValue();
7300 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7301 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7302 }
7303 return true;
7304}
7305
7306/// Tries to transform switch of powers of two to reduce switch range.
7307/// For example, switch like:
7308/// switch (C) { case 1: case 2: case 64: case 128: }
7309/// will be transformed to:
7310/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7311///
7312/// This transformation allows better lowering and could allow transforming into
7313/// a lookup table.
7315 const DataLayout &DL,
7316 const TargetTransformInfo &TTI) {
7317 Value *Condition = SI->getCondition();
7318 LLVMContext &Context = SI->getContext();
7319 auto *CondTy = cast<IntegerType>(Condition->getType());
7320
7321 if (CondTy->getIntegerBitWidth() > 64 ||
7322 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7323 return false;
7324
7325 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7326 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7327 {Condition, ConstantInt::getTrue(Context)}),
7329
7330 if (CttzIntrinsicCost > TTI::TCC_Basic)
7331 // Inserting intrinsic is too expensive.
7332 return false;
7333
7334 // Only bother with this optimization if there are more than 3 switch cases.
7335 // SDAG will only bother creating jump tables for 4 or more cases.
7336 if (SI->getNumCases() < 4)
7337 return false;
7338
7339 // We perform this optimization only for switches with
7340 // unreachable default case.
7341 // This assumtion will save us from checking if `Condition` is a power of two.
7342 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7343 return false;
7344
7345 // Check that switch cases are powers of two.
7347 for (const auto &Case : SI->cases()) {
7348 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7349 if (llvm::has_single_bit(CaseValue))
7350 Values.push_back(CaseValue);
7351 else
7352 return false;
7353 }
7354
7355 // isSwichDense requires case values to be sorted.
7356 llvm::sort(Values);
7357 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7358 llvm::countr_zero(Values.front()) + 1))
7359 // Transform is unable to generate dense switch.
7360 return false;
7361
7362 Builder.SetInsertPoint(SI);
7363
7364 // Replace each case with its trailing zeros number.
7365 for (auto &Case : SI->cases()) {
7366 auto *OrigValue = Case.getCaseValue();
7367 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7368 OrigValue->getValue().countr_zero()));
7369 }
7370
7371 // Replace condition with its trailing zeros number.
7372 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7373 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7374
7375 SI->setCondition(ConditionTrailingZeros);
7376
7377 return true;
7378}
7379
7380/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7381/// the same destination.
7383 DomTreeUpdater *DTU) {
7384 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7385 if (!Cmp || !Cmp->hasOneUse())
7386 return false;
7387
7389 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7390 if (!HasWeights)
7391 Weights.resize(4); // Avoid checking HasWeights everywhere.
7392
7393 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7394 int64_t Res;
7395 BasicBlock *Succ, *OtherSucc;
7396 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7397 BasicBlock *Unreachable = nullptr;
7398
7399 if (SI->getNumCases() == 2) {
7400 // Find which of 1, 0 or -1 is missing (handled by default dest).
7401 SmallSet<int64_t, 3> Missing;
7402 Missing.insert(1);
7403 Missing.insert(0);
7404 Missing.insert(-1);
7405
7406 Succ = SI->getDefaultDest();
7407 SuccWeight = Weights[0];
7408 OtherSucc = nullptr;
7409 for (auto &Case : SI->cases()) {
7410 std::optional<int64_t> Val =
7411 Case.getCaseValue()->getValue().trySExtValue();
7412 if (!Val)
7413 return false;
7414 if (!Missing.erase(*Val))
7415 return false;
7416 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7417 return false;
7418 OtherSucc = Case.getCaseSuccessor();
7419 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7420 }
7421
7422 assert(Missing.size() == 1 && "Should have one case left");
7423 Res = *Missing.begin();
7424 } else if (SI->getNumCases() == 3 && SI->defaultDestUndefined()) {
7425 // Normalize so that Succ is taken once and OtherSucc twice.
7426 Unreachable = SI->getDefaultDest();
7427 Succ = OtherSucc = nullptr;
7428 for (auto &Case : SI->cases()) {
7429 BasicBlock *NewSucc = Case.getCaseSuccessor();
7430 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7431 if (!OtherSucc || OtherSucc == NewSucc) {
7432 OtherSucc = NewSucc;
7433 OtherSuccWeight += Weight;
7434 } else if (!Succ) {
7435 Succ = NewSucc;
7436 SuccWeight = Weight;
7437 } else if (Succ == NewSucc) {
7438 std::swap(Succ, OtherSucc);
7439 std::swap(SuccWeight, OtherSuccWeight);
7440 } else
7441 return false;
7442 }
7443 for (auto &Case : SI->cases()) {
7444 std::optional<int64_t> Val =
7445 Case.getCaseValue()->getValue().trySExtValue();
7446 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7447 return false;
7448 if (Case.getCaseSuccessor() == Succ) {
7449 Res = *Val;
7450 break;
7451 }
7452 }
7453 } else {
7454 return false;
7455 }
7456
7457 // Determine predicate for the missing case.
7459 switch (Res) {
7460 case 1:
7461 Pred = ICmpInst::ICMP_UGT;
7462 break;
7463 case 0:
7464 Pred = ICmpInst::ICMP_EQ;
7465 break;
7466 case -1:
7467 Pred = ICmpInst::ICMP_ULT;
7468 break;
7469 }
7470 if (Cmp->isSigned())
7471 Pred = ICmpInst::getSignedPredicate(Pred);
7472
7473 MDNode *NewWeights = nullptr;
7474 if (HasWeights)
7475 NewWeights = MDBuilder(SI->getContext())
7476 .createBranchWeights(SuccWeight, OtherSuccWeight);
7477
7478 BasicBlock *BB = SI->getParent();
7479 Builder.SetInsertPoint(SI->getIterator());
7480 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7481 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7482 SI->getMetadata(LLVMContext::MD_unpredictable));
7483 OtherSucc->removePredecessor(BB);
7484 if (Unreachable)
7485 Unreachable->removePredecessor(BB);
7486 SI->eraseFromParent();
7487 Cmp->eraseFromParent();
7488 if (DTU && Unreachable)
7489 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7490 return true;
7491}
7492
7493/// Checking whether two cases of SI are equal depends on the contents of the
7494/// BasicBlock and the incoming values of their successor PHINodes.
7495/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7496/// calling this function on each BasicBlock every time isEqual is called,
7497/// especially since the same BasicBlock may be passed as an argument multiple
7498/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7499/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7500/// of the incoming values.
7504};
7505
7506namespace llvm {
7507template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7509 return static_cast<SwitchSuccWrapper *>(
7511 }
7513 return static_cast<SwitchSuccWrapper *>(
7515 }
7516 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7517 BasicBlock *Succ = SSW->Dest;
7518 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7519 assert(BI->isUnconditional() &&
7520 "Only supporting unconditional branches for now");
7521 assert(BI->getNumSuccessors() == 1 &&
7522 "Expected unconditional branches to have one successor");
7523 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7524
7525 // Since we assume the BB is just a single BranchInst with a single
7526 // successor, we hash as the BB and the incoming Values of its successor
7527 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7528 // including the incoming PHI values leads to better performance.
7529 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7530 // time and passing it in SwitchSuccWrapper, but this slowed down the
7531 // average compile time without having any impact on the worst case compile
7532 // time.
7533 BasicBlock *BB = BI->getSuccessor(0);
7534 SmallVector<Value *> PhiValsForBB;
7535 for (PHINode &Phi : BB->phis())
7536 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7537
7538 return hash_combine(
7539 BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
7540 }
7541 static bool isEqual(const SwitchSuccWrapper *LHS,
7542 const SwitchSuccWrapper *RHS) {
7545 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7546 return LHS == RHS;
7547
7548 BasicBlock *A = LHS->Dest;
7549 BasicBlock *B = RHS->Dest;
7550
7551 // FIXME: we checked that the size of A and B are both 1 in
7552 // simplifyDuplicateSwitchArms to make the Case list smaller to
7553 // improve performance. If we decide to support BasicBlocks with more
7554 // than just a single instruction, we need to check that A.size() ==
7555 // B.size() here, and we need to check more than just the BranchInsts
7556 // for equality.
7557
7558 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7559 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7560 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7561 "Only supporting unconditional branches for now");
7562 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7563 return false;
7564
7565 // Need to check that PHIs in successor have matching values
7566 BasicBlock *Succ = ABI->getSuccessor(0);
7567 for (PHINode &Phi : Succ->phis()) {
7568 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7569 if (PredIVs[A] != PredIVs[B])
7570 return false;
7571 }
7572
7573 return true;
7574 }
7575};
7576} // namespace llvm
7577
7578bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7579 DomTreeUpdater *DTU) {
7580 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7581 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7582 // an entire PHI at once after the loop, opposed to calling
7583 // getIncomingValueForBlock inside this loop, since each call to
7584 // getIncomingValueForBlock is O(|Preds|).
7590 Cases.reserve(SI->getNumSuccessors());
7591
7592 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7593 BasicBlock *BB = SI->getSuccessor(I);
7594
7595 // FIXME: Support more than just a single BranchInst. One way we could do
7596 // this is by taking a hashing approach of all insts in BB.
7597 if (BB->size() != 1)
7598 continue;
7599
7600 // FIXME: This case needs some extra care because the terminators other than
7601 // SI need to be updated. For now, consider only backedges to the SI.
7602 if (BB->hasNPredecessorsOrMore(4) ||
7603 BB->getUniquePredecessor() != SI->getParent())
7604 continue;
7605
7606 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7607 // on other kinds of terminators. We decide to only support unconditional
7608 // branches for now for compile time reasons.
7609 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7610 if (!BI || BI->isConditional())
7611 continue;
7612
7613 if (Seen.insert(BB).second) {
7614 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7615 for (BasicBlock *Succ : BI->successors())
7616 for (PHINode &Phi : Succ->phis())
7617 Phis.insert(&Phi);
7618 // Add the successor only if not previously visited.
7619 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7620 }
7621
7622 BBToSuccessorIndexes[BB].emplace_back(I);
7623 }
7624
7625 // Precompute a data structure to improve performance of isEqual for
7626 // SwitchSuccWrapper.
7627 PhiPredIVs.reserve(Phis.size());
7628 for (PHINode *Phi : Phis) {
7629 PhiPredIVs[Phi] =
7630 SmallDenseMap<BasicBlock *, Value *, 8>(Phi->getNumIncomingValues());
7631 for (auto &IV : Phi->incoming_values())
7632 PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
7633 }
7634
7635 // Build a set such that if the SwitchSuccWrapper exists in the set and
7636 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7637 // which is not in the set should be replaced with the one in the set. If the
7638 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7639 // other SwitchSuccWrappers can check against it in the same manner. We use
7640 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7641 // around information to isEquality, getHashValue, and when doing the
7642 // replacement with better performance.
7644 ReplaceWith.reserve(Cases.size());
7645
7647 Updates.reserve(ReplaceWith.size());
7648 bool MadeChange = false;
7649 for (auto &SSW : Cases) {
7650 // SSW is a candidate for simplification. If we find a duplicate BB,
7651 // replace it.
7652 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7653 if (!Inserted) {
7654 // We know that SI's parent BB no longer dominates the old case successor
7655 // since we are making it dead.
7656 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7657 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7658 for (unsigned Idx : Successors)
7659 SI->setSuccessor(Idx, (*It)->Dest);
7660 MadeChange = true;
7661 }
7662 }
7663
7664 if (DTU)
7665 DTU->applyUpdates(Updates);
7666
7667 return MadeChange;
7668}
7669
7670bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7671 BasicBlock *BB = SI->getParent();
7672
7673 if (isValueEqualityComparison(SI)) {
7674 // If we only have one predecessor, and if it is a branch on this value,
7675 // see if that predecessor totally determines the outcome of this switch.
7676 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7677 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7678 return requestResimplify();
7679
7680 Value *Cond = SI->getCondition();
7681 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7682 if (simplifySwitchOnSelect(SI, Select))
7683 return requestResimplify();
7684
7685 // If the block only contains the switch, see if we can fold the block
7686 // away into any preds.
7687 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7688 if (foldValueComparisonIntoPredecessors(SI, Builder))
7689 return requestResimplify();
7690 }
7691
7692 // Try to transform the switch into an icmp and a branch.
7693 // The conversion from switch to comparison may lose information on
7694 // impossible switch values, so disable it early in the pipeline.
7695 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7696 return requestResimplify();
7697
7698 // Remove unreachable cases.
7699 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7700 return requestResimplify();
7701
7702 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7703 return requestResimplify();
7704
7705 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7706 return requestResimplify();
7707
7708 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7709 return requestResimplify();
7710
7711 // The conversion from switch to lookup tables results in difficult-to-analyze
7712 // code and makes pruning branches much harder. This is a problem if the
7713 // switch expression itself can still be restricted as a result of inlining or
7714 // CVP. Therefore, only apply this transformation during late stages of the
7715 // optimisation pipeline.
7716 if (Options.ConvertSwitchToLookupTable &&
7717 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7718 return requestResimplify();
7719
7720 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7721 return requestResimplify();
7722
7723 if (reduceSwitchRange(SI, Builder, DL, TTI))
7724 return requestResimplify();
7725
7726 if (HoistCommon &&
7727 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7728 return requestResimplify();
7729
7730 if (simplifyDuplicateSwitchArms(SI, DTU))
7731 return requestResimplify();
7732
7733 return false;
7734}
7735
7736bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7737 BasicBlock *BB = IBI->getParent();
7738 bool Changed = false;
7739
7740 // Eliminate redundant destinations.
7743 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7744 BasicBlock *Dest = IBI->getDestination(i);
7745 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7746 if (!Dest->hasAddressTaken())
7747 RemovedSuccs.insert(Dest);
7748 Dest->removePredecessor(BB);
7749 IBI->removeDestination(i);
7750 --i;
7751 --e;
7752 Changed = true;
7753 }
7754 }
7755
7756 if (DTU) {
7757 std::vector<DominatorTree::UpdateType> Updates;
7758 Updates.reserve(RemovedSuccs.size());
7759 for (auto *RemovedSucc : RemovedSuccs)
7760 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7761 DTU->applyUpdates(Updates);
7762 }
7763
7764 if (IBI->getNumDestinations() == 0) {
7765 // If the indirectbr has no successors, change it to unreachable.
7766 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7768 return true;
7769 }
7770
7771 if (IBI->getNumDestinations() == 1) {
7772 // If the indirectbr has one successor, change it to a direct branch.
7775 return true;
7776 }
7777
7778 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7779 if (simplifyIndirectBrOnSelect(IBI, SI))
7780 return requestResimplify();
7781 }
7782 return Changed;
7783}
7784
7785/// Given an block with only a single landing pad and a unconditional branch
7786/// try to find another basic block which this one can be merged with. This
7787/// handles cases where we have multiple invokes with unique landing pads, but
7788/// a shared handler.
7789///
7790/// We specifically choose to not worry about merging non-empty blocks
7791/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7792/// practice, the optimizer produces empty landing pad blocks quite frequently
7793/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7794/// sinking in this file)
7795///
7796/// This is primarily a code size optimization. We need to avoid performing
7797/// any transform which might inhibit optimization (such as our ability to
7798/// specialize a particular handler via tail commoning). We do this by not
7799/// merging any blocks which require us to introduce a phi. Since the same
7800/// values are flowing through both blocks, we don't lose any ability to
7801/// specialize. If anything, we make such specialization more likely.
7802///
7803/// TODO - This transformation could remove entries from a phi in the target
7804/// block when the inputs in the phi are the same for the two blocks being
7805/// merged. In some cases, this could result in removal of the PHI entirely.
7807 BasicBlock *BB, DomTreeUpdater *DTU) {
7808 auto Succ = BB->getUniqueSuccessor();
7809 assert(Succ);
7810 // If there's a phi in the successor block, we'd likely have to introduce
7811 // a phi into the merged landing pad block.
7812 if (isa<PHINode>(*Succ->begin()))
7813 return false;
7814
7815 for (BasicBlock *OtherPred : predecessors(Succ)) {
7816 if (BB == OtherPred)
7817 continue;
7818 BasicBlock::iterator I = OtherPred->begin();
7819 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7820 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7821 continue;
7822 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7823 ;
7824 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7825 if (!BI2 || !BI2->isIdenticalTo(BI))
7826 continue;
7827
7828 std::vector<DominatorTree::UpdateType> Updates;
7829
7830 // We've found an identical block. Update our predecessors to take that
7831 // path instead and make ourselves dead.
7833 for (BasicBlock *Pred : UniquePreds) {
7834 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7835 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7836 "unexpected successor");
7837 II->setUnwindDest(OtherPred);
7838 if (DTU) {
7839 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7840 Updates.push_back({DominatorTree::Delete, Pred, BB});
7841 }
7842 }
7843
7844 // The debug info in OtherPred doesn't cover the merged control flow that
7845 // used to go through BB. We need to delete it or update it.
7846 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7847 if (isa<DbgInfoIntrinsic>(Inst))
7848 Inst.eraseFromParent();
7849
7851 for (BasicBlock *Succ : UniqueSuccs) {
7852 Succ->removePredecessor(BB);
7853 if (DTU)
7854 Updates.push_back({DominatorTree::Delete, BB, Succ});
7855 }
7856
7857 IRBuilder<> Builder(BI);
7858 Builder.CreateUnreachable();
7859 BI->eraseFromParent();
7860 if (DTU)
7861 DTU->applyUpdates(Updates);
7862 return true;
7863 }
7864 return false;
7865}
7866
7867bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7868 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7869 : simplifyCondBranch(Branch, Builder);
7870}
7871
7872bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7873 IRBuilder<> &Builder) {
7874 BasicBlock *BB = BI->getParent();
7875 BasicBlock *Succ = BI->getSuccessor(0);
7876
7877 // If the Terminator is the only non-phi instruction, simplify the block.
7878 // If LoopHeader is provided, check if the block or its successor is a loop
7879 // header. (This is for early invocations before loop simplify and
7880 // vectorization to keep canonical loop forms for nested loops. These blocks
7881 // can be eliminated when the pass is invoked later in the back-end.)
7882 // Note that if BB has only one predecessor then we do not introduce new
7883 // backedge, so we can eliminate BB.
7884 bool NeedCanonicalLoop =
7885 Options.NeedCanonicalLoop &&
7886 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7887 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7889 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7890 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7891 return true;
7892
7893 // If the only instruction in the block is a seteq/setne comparison against a
7894 // constant, try to simplify the block.
7895 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7896 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7897 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7898 ;
7899 if (I->isTerminator() &&
7900 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7901 return true;
7902 }
7903
7904 // See if we can merge an empty landing pad block with another which is
7905 // equivalent.
7906 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7907 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7908 ;
7909 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7910 return true;
7911 }
7912
7913 // If this basic block is ONLY a compare and a branch, and if a predecessor
7914 // branches to us and our successor, fold the comparison into the
7915 // predecessor and use logical operations to update the incoming value
7916 // for PHI nodes in common successor.
7917 if (Options.SpeculateBlocks &&
7918 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7919 Options.BonusInstThreshold))
7920 return requestResimplify();
7921 return false;
7922}
7923
7925 BasicBlock *PredPred = nullptr;
7926 for (auto *P : predecessors(BB)) {
7927 BasicBlock *PPred = P->getSinglePredecessor();
7928 if (!PPred || (PredPred && PredPred != PPred))
7929 return nullptr;
7930 PredPred = PPred;
7931 }
7932 return PredPred;
7933}
7934
7935/// Fold the following pattern:
7936/// bb0:
7937/// br i1 %cond1, label %bb1, label %bb2
7938/// bb1:
7939/// br i1 %cond2, label %bb3, label %bb4
7940/// bb2:
7941/// br i1 %cond2, label %bb4, label %bb3
7942/// bb3:
7943/// ...
7944/// bb4:
7945/// ...
7946/// into
7947/// bb0:
7948/// %cond = xor i1 %cond1, %cond2
7949/// br i1 %cond, label %bb4, label %bb3
7950/// bb3:
7951/// ...
7952/// bb4:
7953/// ...
7954/// NOTE: %cond2 always dominates the terminator of bb0.
7956 BasicBlock *BB = BI->getParent();
7957 BasicBlock *BB1 = BI->getSuccessor(0);
7958 BasicBlock *BB2 = BI->getSuccessor(1);
7959 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7960 if (Succ == BB)
7961 return false;
7962 if (&Succ->front() != Succ->getTerminator())
7963 return false;
7964 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7965 if (!SuccBI || !SuccBI->isConditional())
7966 return false;
7967 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7968 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7969 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7970 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7971 };
7972 BranchInst *BB1BI, *BB2BI;
7973 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7974 return false;
7975
7976 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7977 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7978 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7979 return false;
7980
7981 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7982 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7983 IRBuilder<> Builder(BI);
7984 BI->setCondition(
7985 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7986 BB1->removePredecessor(BB);
7987 BI->setSuccessor(0, BB4);
7988 BB2->removePredecessor(BB);
7989 BI->setSuccessor(1, BB3);
7990 if (DTU) {
7992 Updates.push_back({DominatorTree::Delete, BB, BB1});
7993 Updates.push_back({DominatorTree::Insert, BB, BB4});
7994 Updates.push_back({DominatorTree::Delete, BB, BB2});
7995 Updates.push_back({DominatorTree::Insert, BB, BB3});
7996
7997 DTU->applyUpdates(Updates);
7998 }
7999 bool HasWeight = false;
8000 uint64_t BBTWeight, BBFWeight;
8001 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8002 HasWeight = true;
8003 else
8004 BBTWeight = BBFWeight = 1;
8005 uint64_t BB1TWeight, BB1FWeight;
8006 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8007 HasWeight = true;
8008 else
8009 BB1TWeight = BB1FWeight = 1;
8010 uint64_t BB2TWeight, BB2FWeight;
8011 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8012 HasWeight = true;
8013 else
8014 BB2TWeight = BB2FWeight = 1;
8015 if (HasWeight) {
8016 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8017 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8018 fitWeights(Weights);
8019 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8020 }
8021 return true;
8022}
8023
8024bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8025 assert(
8026 !isa<ConstantInt>(BI->getCondition()) &&
8027 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8028 "Tautological conditional branch should have been eliminated already.");
8029
8030 BasicBlock *BB = BI->getParent();
8031 if (!Options.SimplifyCondBranch ||
8032 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8033 return false;
8034
8035 // Conditional branch
8036 if (isValueEqualityComparison(BI)) {
8037 // If we only have one predecessor, and if it is a branch on this value,
8038 // see if that predecessor totally determines the outcome of this
8039 // switch.
8040 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8041 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8042 return requestResimplify();
8043
8044 // This block must be empty, except for the setcond inst, if it exists.
8045 // Ignore dbg and pseudo intrinsics.
8046 auto I = BB->instructionsWithoutDebug(true).begin();
8047 if (&*I == BI) {
8048 if (foldValueComparisonIntoPredecessors(BI, Builder))
8049 return requestResimplify();
8050 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8051 ++I;
8052 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8053 return requestResimplify();
8054 }
8055 }
8056
8057 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8058 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8059 return true;
8060
8061 // If this basic block has dominating predecessor blocks and the dominating
8062 // blocks' conditions imply BI's condition, we know the direction of BI.
8063 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8064 if (Imp) {
8065 // Turn this into a branch on constant.
8066 auto *OldCond = BI->getCondition();
8067 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8068 : ConstantInt::getFalse(BB->getContext());
8069 BI->setCondition(TorF);
8071 return requestResimplify();
8072 }
8073
8074 // If this basic block is ONLY a compare and a branch, and if a predecessor
8075 // branches to us and one of our successors, fold the comparison into the
8076 // predecessor and use logical operations to pick the right destination.
8077 if (Options.SpeculateBlocks &&
8078 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8079 Options.BonusInstThreshold))
8080 return requestResimplify();
8081
8082 // We have a conditional branch to two blocks that are only reachable
8083 // from BI. We know that the condbr dominates the two blocks, so see if
8084 // there is any identical code in the "then" and "else" blocks. If so, we
8085 // can hoist it up to the branching block.
8086 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8087 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8088 if (HoistCommon &&
8089 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8090 return requestResimplify();
8091
8093 Options.HoistLoadsStoresWithCondFaulting &&
8094 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8095 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8096 auto CanSpeculateConditionalLoadsStores = [&]() {
8097 for (auto *Succ : successors(BB)) {
8098 for (Instruction &I : *Succ) {
8099 if (I.isTerminator()) {
8100 if (I.getNumSuccessors() > 1)
8101 return false;
8102 continue;
8103 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8104 SpeculatedConditionalLoadsStores.size() ==
8106 return false;
8107 }
8108 SpeculatedConditionalLoadsStores.push_back(&I);
8109 }
8110 }
8111 return !SpeculatedConditionalLoadsStores.empty();
8112 };
8113
8114 if (CanSpeculateConditionalLoadsStores()) {
8115 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8116 std::nullopt);
8117 return requestResimplify();
8118 }
8119 }
8120 } else {
8121 // If Successor #1 has multiple preds, we may be able to conditionally
8122 // execute Successor #0 if it branches to Successor #1.
8123 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8124 if (Succ0TI->getNumSuccessors() == 1 &&
8125 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8126 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8127 return requestResimplify();
8128 }
8129 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8130 // If Successor #0 has multiple preds, we may be able to conditionally
8131 // execute Successor #1 if it branches to Successor #0.
8132 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8133 if (Succ1TI->getNumSuccessors() == 1 &&
8134 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8135 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8136 return requestResimplify();
8137 }
8138
8139 // If this is a branch on something for which we know the constant value in
8140 // predecessors (e.g. a phi node in the current block), thread control
8141 // through this block.
8143 return requestResimplify();
8144
8145 // Scan predecessor blocks for conditional branches.
8146 for (BasicBlock *Pred : predecessors(BB))
8147 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8148 if (PBI != BI && PBI->isConditional())
8149 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8150 return requestResimplify();
8151
8152 // Look for diamond patterns.
8153 if (MergeCondStores)
8155 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8156 if (PBI != BI && PBI->isConditional())
8157 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8158 return requestResimplify();
8159
8160 // Look for nested conditional branches.
8161 if (mergeNestedCondBranch(BI, DTU))
8162 return requestResimplify();
8163
8164 return false;
8165}
8166
8167/// Check if passing a value to an instruction will cause undefined behavior.
8168static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8169 Constant *C = dyn_cast<Constant>(V);
8170 if (!C)
8171 return false;
8172
8173 if (I->use_empty())
8174 return false;
8175
8176 if (C->isNullValue() || isa<UndefValue>(C)) {
8177 // Only look at the first use we can handle, avoid hurting compile time with
8178 // long uselists
8179 auto FindUse = llvm::find_if(I->users(), [](auto *U) {
8180 auto *Use = cast<Instruction>(U);
8181 // Change this list when we want to add new instructions.
8182 switch (Use->getOpcode()) {
8183 default:
8184 return false;
8185 case Instruction::GetElementPtr:
8186 case Instruction::Ret:
8187 case Instruction::BitCast:
8188 case Instruction::Load:
8189 case Instruction::Store:
8190 case Instruction::Call:
8191 case Instruction::CallBr:
8192 case Instruction::Invoke:
8193 case Instruction::UDiv:
8194 case Instruction::URem:
8195 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8196 // implemented to avoid code complexity as it is unclear how useful such
8197 // logic is.
8198 case Instruction::SDiv:
8199 case Instruction::SRem:
8200 return true;
8201 }
8202 });
8203 if (FindUse == I->user_end())
8204 return false;
8205 auto *Use = cast<Instruction>(*FindUse);
8206 // Bail out if Use is not in the same BB as I or Use == I or Use comes
8207 // before I in the block. The latter two can be the case if Use is a
8208 // PHI node.
8209 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8210 return false;
8211
8212 // Now make sure that there are no instructions in between that can alter
8213 // control flow (eg. calls)
8214 auto InstrRange =
8215 make_range(std::next(I->getIterator()), Use->getIterator());
8216 if (any_of(InstrRange, [](Instruction &I) {
8218 }))
8219 return false;
8220
8221 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8222 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
8223 if (GEP->getPointerOperand() == I) {
8224 // The current base address is null, there are four cases to consider:
8225 // getelementptr (TY, null, 0) -> null
8226 // getelementptr (TY, null, not zero) -> may be modified
8227 // getelementptr inbounds (TY, null, 0) -> null
8228 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8229 // undefined?
8230 if (!GEP->hasAllZeroIndices() &&
8231 (!GEP->isInBounds() ||
8232 NullPointerIsDefined(GEP->getFunction(),
8233 GEP->getPointerAddressSpace())))
8234 PtrValueMayBeModified = true;
8235 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8236 }
8237
8238 // Look through return.
8239 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
8240 bool HasNoUndefAttr =
8241 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8242 // Return undefined to a noundef return value is undefined.
8243 if (isa<UndefValue>(C) && HasNoUndefAttr)
8244 return true;
8245 // Return null to a nonnull+noundef return value is undefined.
8246 if (C->isNullValue() && HasNoUndefAttr &&
8247 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8248 return !PtrValueMayBeModified;
8249 }
8250 }
8251
8252 // Load from null is undefined.
8253 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
8254 if (!LI->isVolatile())
8255 return !NullPointerIsDefined(LI->getFunction(),
8256 LI->getPointerAddressSpace());
8257
8258 // Store to null is undefined.
8259 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
8260 if (!SI->isVolatile())
8261 return (!NullPointerIsDefined(SI->getFunction(),
8262 SI->getPointerAddressSpace())) &&
8263 SI->getPointerOperand() == I;
8264
8265 // llvm.assume(false/undef) always triggers immediate UB.
8266 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
8267 // Ignore assume operand bundles.
8268 if (I == Assume->getArgOperand(0))
8269 return true;
8270 }
8271
8272 if (auto *CB = dyn_cast<CallBase>(Use)) {
8273 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8274 return false;
8275 // A call to null is undefined.
8276 if (CB->getCalledOperand() == I)
8277 return true;
8278
8279 if (C->isNullValue()) {
8280 for (const llvm::Use &Arg : CB->args())
8281 if (Arg == I) {
8282 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8283 if (CB->isPassingUndefUB(ArgIdx) &&
8284 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
8285 // Passing null to a nonnnull+noundef argument is undefined.
8286 return !PtrValueMayBeModified;
8287 }
8288 }
8289 } else if (isa<UndefValue>(C)) {
8290 // Passing undef to a noundef argument is undefined.
8291 for (const llvm::Use &Arg : CB->args())
8292 if (Arg == I) {
8293 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8294 if (CB->isPassingUndefUB(ArgIdx)) {
8295 // Passing undef to a noundef argument is undefined.
8296 return true;
8297 }
8298 }
8299 }
8300 }
8301 // Div/Rem by zero is immediate UB
8302 if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem())
8303 return true;
8304 }
8305 return false;
8306}
8307
8308/// If BB has an incoming value that will always trigger undefined behavior
8309/// (eg. null pointer dereference), remove the branch leading here.
8311 DomTreeUpdater *DTU,
8312 AssumptionCache *AC) {
8313 for (PHINode &PHI : BB->phis())
8314 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8315 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8316 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8317 Instruction *T = Predecessor->getTerminator();
8318 IRBuilder<> Builder(T);
8319 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8320 BB->removePredecessor(Predecessor);
8321 // Turn unconditional branches into unreachables and remove the dead
8322 // destination from conditional branches.
8323 if (BI->isUnconditional())
8324 Builder.CreateUnreachable();
8325 else {
8326 // Preserve guarding condition in assume, because it might not be
8327 // inferrable from any dominating condition.
8328 Value *Cond = BI->getCondition();
8329 CallInst *Assumption;
8330 if (BI->getSuccessor(0) == BB)
8331 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8332 else
8333 Assumption = Builder.CreateAssumption(Cond);
8334 if (AC)
8335 AC->registerAssumption(cast<AssumeInst>(Assumption));
8336 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8337 : BI->getSuccessor(0));
8338 }
8339 BI->eraseFromParent();
8340 if (DTU)
8341 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8342 return true;
8343 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8344 // Redirect all branches leading to UB into
8345 // a newly created unreachable block.
8346 BasicBlock *Unreachable = BasicBlock::Create(
8347 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8348 Builder.SetInsertPoint(Unreachable);
8349 // The new block contains only one instruction: Unreachable
8350 Builder.CreateUnreachable();
8351 for (const auto &Case : SI->cases())
8352 if (Case.getCaseSuccessor() == BB) {
8353 BB->removePredecessor(Predecessor);
8354 Case.setSuccessor(Unreachable);
8355 }
8356 if (SI->getDefaultDest() == BB) {
8357 BB->removePredecessor(Predecessor);
8358 SI->setDefaultDest(Unreachable);
8359 }
8360
8361 if (DTU)
8362 DTU->applyUpdates(
8363 { { DominatorTree::Insert, Predecessor, Unreachable },
8364 { DominatorTree::Delete, Predecessor, BB } });
8365 return true;
8366 }
8367 }
8368
8369 return false;
8370}
8371
8372bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8373 bool Changed = false;
8374
8375 assert(BB && BB->getParent() && "Block not embedded in function!");
8376 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8377
8378 // Remove basic blocks that have no predecessors (except the entry block)...
8379 // or that just have themself as a predecessor. These are unreachable.
8380 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8381 BB->getSinglePredecessor() == BB) {
8382 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8383 DeleteDeadBlock(BB, DTU);
8384 return true;
8385 }
8386
8387 // Check to see if we can constant propagate this terminator instruction
8388 // away...
8389 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8390 /*TLI=*/nullptr, DTU);
8391
8392 // Check for and eliminate duplicate PHI nodes in this block.
8393 Changed |= EliminateDuplicatePHINodes(BB);
8394
8395 // Check for and remove branches that will always cause undefined behavior.
8397 return requestResimplify();
8398
8399 // Merge basic blocks into their predecessor if there is only one distinct
8400 // pred, and if there is only one distinct successor of the predecessor, and
8401 // if there are no PHI nodes.
8402 if (MergeBlockIntoPredecessor(BB, DTU))
8403 return true;
8404
8405 if (SinkCommon && Options.SinkCommonInsts)
8406 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8407 mergeCompatibleInvokes(BB, DTU)) {
8408 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8409 // so we may now how duplicate PHI's.
8410 // Let's rerun EliminateDuplicatePHINodes() first,
8411 // before foldTwoEntryPHINode() potentially converts them into select's,
8412 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8413 return true;
8414 }
8415
8416 IRBuilder<> Builder(BB);
8417
8418 if (Options.SpeculateBlocks &&
8419 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8420 // If there is a trivial two-entry PHI node in this basic block, and we can
8421 // eliminate it, do so now.
8422 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8423 if (PN->getNumIncomingValues() == 2)
8424 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8425 Options.SpeculateUnpredictables))
8426 return true;
8427 }
8428
8430 Builder.SetInsertPoint(Terminator);
8431 switch (Terminator->getOpcode()) {
8432 case Instruction::Br:
8433 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8434 break;
8435 case Instruction::Resume:
8436 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8437 break;
8438 case Instruction::CleanupRet:
8439 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8440 break;
8441 case Instruction::Switch:
8442 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8443 break;
8444 case Instruction::Unreachable:
8445 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8446 break;
8447 case Instruction::IndirectBr:
8448 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8449 break;
8450 }
8451
8452 return Changed;
8453}
8454
8455bool SimplifyCFGOpt::run(BasicBlock *BB) {
8456 bool Changed = false;
8457
8458 // Repeated simplify BB as long as resimplification is requested.
8459 do {
8460 Resimplify = false;
8461
8462 // Perform one round of simplifcation. Resimplify flag will be set if
8463 // another iteration is requested.
8464 Changed |= simplifyOnce(BB);
8465 } while (Resimplify);
8466
8467 return Changed;
8468}
8469
8472 ArrayRef<WeakVH> LoopHeaders) {
8473 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8474 Options)
8475 .run(BB);
8476}
#define Fail
#define Success
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< bool > HoistLoadsStoresWithCondFaulting("simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting"))
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert)
If the target supports conditional faulting, we look for the following pattern:
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1554
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1915
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:177
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:378
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:717
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
size_t size() const
Definition: BasicBlock.h:469
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
The address of a basic block.
Definition: Constants.h:893
BasicBlock * getBasicBlock() const
Definition: Constants.h:924
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1568
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2625
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isNegative() const
Definition: Constants.h:203
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:187
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
iterator begin()
Definition: Function.h:853
size_t size() const
Definition: Function.h:858
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2285
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2050
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1305
Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:521
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1043
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2573
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1479
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:239
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1881
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:252
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1756
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1186
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2269
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1386
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2151
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1163
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1797
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2032
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1517
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1810
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1369
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2141
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2018
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1539
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1670
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1157
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1687
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2224
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1561
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2379
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1693
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:390
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:906
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:949
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:176
static unsigned getPointerOperandIndex()
Definition: Instructions.h:257
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Align getAlign() const
Definition: Instructions.h:333
bool isSimple() const
Definition: Instructions.h:370
Value * getValueOperand()
Definition: Instructions.h:378
bool isUnordered() const
Definition: Instructions.h:372
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:234
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:886
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
static constexpr uint64_t MaximumAlignment
Definition: Value.h:811
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:90
size_type size() const
Definition: DenseSet.h:81
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1866
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1880
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1156
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1439
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3236
auto succ_size(const MachineBasicBlock *BB)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3439
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3706
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4209
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:234
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1624
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2067
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1524
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254