LLVM 20.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
78#include <algorithm>
79#include <cassert>
80#include <climits>
81#include <cstddef>
82#include <cstdint>
83#include <iterator>
84#include <map>
85#include <optional>
86#include <set>
87#include <tuple>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "simplifycfg"
95
97 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
98
99 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
100 "into preserving DomTree,"));
101
102// Chosen as 2 so as to be cheap, but still to have enough power to fold
103// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
104// To catch this, we need to fold a compare and a select, hence '2' being the
105// minimum reasonable default.
107 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
108 cl::desc(
109 "Control the amount of phi node folding to perform (default = 2)"));
110
112 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
113 cl::desc("Control the maximal total instruction cost that we are willing "
114 "to speculatively execute to fold a 2-entry PHI node into a "
115 "select (default = 4)"));
116
117static cl::opt<bool>
118 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
119 cl::desc("Hoist common instructions up to the parent block"));
120
122 "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
123 cl::init(true),
124 cl::desc("Hoist loads/stores if the target supports "
125 "conditional faulting"));
126
128 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
129 cl::desc("Control the maximal conditonal load/store that we are willing "
130 "to speculatively execute to eliminate conditional branch "
131 "(default = 6)"));
132
134 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
135 cl::init(20),
136 cl::desc("Allow reordering across at most this many "
137 "instructions when hoisting"));
138
139static cl::opt<bool>
140 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
141 cl::desc("Sink common instructions down to the end block"));
142
144 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
145 cl::desc("Hoist conditional stores if an unconditional store precedes"));
146
148 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
149 cl::desc("Hoist conditional stores even if an unconditional store does not "
150 "precede - hoist multiple conditional stores into a single "
151 "predicated store"));
152
154 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
155 cl::desc("When merging conditional stores, do so even if the resultant "
156 "basic blocks are unlikely to be if-converted as a result"));
157
159 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
160 cl::desc("Allow exactly one expensive instruction to be speculatively "
161 "executed"));
162
164 "max-speculation-depth", cl::Hidden, cl::init(10),
165 cl::desc("Limit maximum recursion depth when calculating costs of "
166 "speculatively executed instructions"));
167
168static cl::opt<int>
169 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
170 cl::init(10),
171 cl::desc("Max size of a block which is still considered "
172 "small enough to thread through"));
173
174// Two is chosen to allow one negation and a logical combine.
176 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
177 cl::init(2),
178 cl::desc("Maximum cost of combining conditions when "
179 "folding branches"));
180
182 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
183 cl::init(2),
184 cl::desc("Multiplier to apply to threshold when determining whether or not "
185 "to fold branch to common destination when vector operations are "
186 "present"));
187
189 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
190 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
191
193 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
194 cl::desc("Limit cases to analyze when converting a switch to select"));
195
196STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
197STATISTIC(NumLinearMaps,
198 "Number of switch instructions turned into linear mapping");
199STATISTIC(NumLookupTables,
200 "Number of switch instructions turned into lookup tables");
202 NumLookupTablesHoles,
203 "Number of switch instructions turned into lookup tables (holes checked)");
204STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
205STATISTIC(NumFoldValueComparisonIntoPredecessors,
206 "Number of value comparisons folded into predecessor basic blocks");
207STATISTIC(NumFoldBranchToCommonDest,
208 "Number of branches folded into predecessor basic block");
210 NumHoistCommonCode,
211 "Number of common instruction 'blocks' hoisted up to the begin block");
212STATISTIC(NumHoistCommonInstrs,
213 "Number of common instructions hoisted up to the begin block");
214STATISTIC(NumSinkCommonCode,
215 "Number of common instruction 'blocks' sunk down to the end block");
216STATISTIC(NumSinkCommonInstrs,
217 "Number of common instructions sunk down to the end block");
218STATISTIC(NumSpeculations, "Number of speculative executed instructions");
219STATISTIC(NumInvokes,
220 "Number of invokes with empty resume blocks simplified into calls");
221STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
222STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
223
224namespace {
225
226// The first field contains the value that the switch produces when a certain
227// case group is selected, and the second field is a vector containing the
228// cases composing the case group.
229using SwitchCaseResultVectorTy =
231
232// The first field contains the phi node that generates a result of the switch
233// and the second field contains the value generated for a certain case in the
234// switch for that PHI.
235using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
236
237/// ValueEqualityComparisonCase - Represents a case of a switch.
238struct ValueEqualityComparisonCase {
240 BasicBlock *Dest;
241
242 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
243 : Value(Value), Dest(Dest) {}
244
245 bool operator<(ValueEqualityComparisonCase RHS) const {
246 // Comparing pointers is ok as we only rely on the order for uniquing.
247 return Value < RHS.Value;
248 }
249
250 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
251};
252
253class SimplifyCFGOpt {
255 DomTreeUpdater *DTU;
256 const DataLayout &DL;
257 ArrayRef<WeakVH> LoopHeaders;
258 const SimplifyCFGOptions &Options;
259 bool Resimplify;
260
261 Value *isValueEqualityComparison(Instruction *TI);
262 BasicBlock *getValueEqualityComparisonCases(
263 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
264 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
265 BasicBlock *Pred,
266 IRBuilder<> &Builder);
267 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
268 Instruction *PTI,
269 IRBuilder<> &Builder);
270 bool foldValueComparisonIntoPredecessors(Instruction *TI,
271 IRBuilder<> &Builder);
272
273 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
274 bool simplifySingleResume(ResumeInst *RI);
275 bool simplifyCommonResume(ResumeInst *RI);
276 bool simplifyCleanupReturn(CleanupReturnInst *RI);
277 bool simplifyUnreachable(UnreachableInst *UI);
278 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
279 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
280 bool simplifyIndirectBr(IndirectBrInst *IBI);
281 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
282 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
283 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
284
285 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
286 IRBuilder<> &Builder);
287
288 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
289 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
290 Instruction *TI, Instruction *I1,
291 SmallVectorImpl<Instruction *> &OtherSuccTIs);
292 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
293 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
294 BasicBlock *TrueBB, BasicBlock *FalseBB,
295 uint32_t TrueWeight, uint32_t FalseWeight);
296 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
297 const DataLayout &DL);
298 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
299 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
300 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
301
302public:
303 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
304 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
305 const SimplifyCFGOptions &Opts)
306 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
307 assert((!DTU || !DTU->hasPostDomTree()) &&
308 "SimplifyCFG is not yet capable of maintaining validity of a "
309 "PostDomTree, so don't ask for it.");
310 }
311
312 bool simplifyOnce(BasicBlock *BB);
313 bool run(BasicBlock *BB);
314
315 // Helper to set Resimplify and return change indication.
316 bool requestResimplify() {
317 Resimplify = true;
318 return true;
319 }
320};
321
322} // end anonymous namespace
323
324/// Return true if all the PHI nodes in the basic block \p BB
325/// receive compatible (identical) incoming values when coming from
326/// all of the predecessor blocks that are specified in \p IncomingBlocks.
327///
328/// Note that if the values aren't exactly identical, but \p EquivalenceSet
329/// is provided, and *both* of the values are present in the set,
330/// then they are considered equal.
332 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
333 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
334 assert(IncomingBlocks.size() == 2 &&
335 "Only for a pair of incoming blocks at the time!");
336
337 // FIXME: it is okay if one of the incoming values is an `undef` value,
338 // iff the other incoming value is guaranteed to be a non-poison value.
339 // FIXME: it is okay if one of the incoming values is a `poison` value.
340 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
341 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
342 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
343 if (IV0 == IV1)
344 return true;
345 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
346 EquivalenceSet->contains(IV1))
347 return true;
348 return false;
349 });
350}
351
352/// Return true if it is safe to merge these two
353/// terminator instructions together.
354static bool
356 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
357 if (SI1 == SI2)
358 return false; // Can't merge with self!
359
360 // It is not safe to merge these two switch instructions if they have a common
361 // successor, and if that successor has a PHI node, and if *that* PHI node has
362 // conflicting incoming values from the two switch blocks.
363 BasicBlock *SI1BB = SI1->getParent();
364 BasicBlock *SI2BB = SI2->getParent();
365
366 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
367 bool Fail = false;
368 for (BasicBlock *Succ : successors(SI2BB)) {
369 if (!SI1Succs.count(Succ))
370 continue;
371 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
372 continue;
373 Fail = true;
374 if (FailBlocks)
375 FailBlocks->insert(Succ);
376 else
377 break;
378 }
379
380 return !Fail;
381}
382
383/// Update PHI nodes in Succ to indicate that there will now be entries in it
384/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
385/// will be the same as those coming in from ExistPred, an existing predecessor
386/// of Succ.
387static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
388 BasicBlock *ExistPred,
389 MemorySSAUpdater *MSSAU = nullptr) {
390 for (PHINode &PN : Succ->phis())
391 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
392 if (MSSAU)
393 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
394 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
395}
396
397/// Compute an abstract "cost" of speculating the given instruction,
398/// which is assumed to be safe to speculate. TCC_Free means cheap,
399/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
400/// expensive.
402 const TargetTransformInfo &TTI) {
404}
405
406/// If we have a merge point of an "if condition" as accepted above,
407/// return true if the specified value dominates the block. We don't handle
408/// the true generality of domination here, just a special case which works
409/// well enough for us.
410///
411/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
412/// see if V (which must be an instruction) and its recursive operands
413/// that do not dominate BB have a combined cost lower than Budget and
414/// are non-trapping. If both are true, the instruction is inserted into the
415/// set and true is returned.
416///
417/// The cost for most non-trapping instructions is defined as 1 except for
418/// Select whose cost is 2.
419///
420/// After this function returns, Cost is increased by the cost of
421/// V plus its non-dominating operands. If that cost is greater than
422/// Budget, false is returned and Cost is undefined.
423static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
424 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
427 AssumptionCache *AC, unsigned Depth = 0) {
428 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
429 // so limit the recursion depth.
430 // TODO: While this recursion limit does prevent pathological behavior, it
431 // would be better to track visited instructions to avoid cycles.
433 return false;
434
435 Instruction *I = dyn_cast<Instruction>(V);
436 if (!I) {
437 // Non-instructions dominate all instructions and can be executed
438 // unconditionally.
439 return true;
440 }
441 BasicBlock *PBB = I->getParent();
442
443 // We don't want to allow weird loops that might have the "if condition" in
444 // the bottom of this block.
445 if (PBB == BB)
446 return false;
447
448 // If this instruction is defined in a block that contains an unconditional
449 // branch to BB, then it must be in the 'conditional' part of the "if
450 // statement". If not, it definitely dominates the region.
451 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
452 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
453 return true;
454
455 // If we have seen this instruction before, don't count it again.
456 if (AggressiveInsts.count(I))
457 return true;
458
459 // Okay, it looks like the instruction IS in the "condition". Check to
460 // see if it's a cheap instruction to unconditionally compute, and if it
461 // only uses stuff defined outside of the condition. If so, hoist it out.
462 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
463 return false;
464
466
467 // Allow exactly one instruction to be speculated regardless of its cost
468 // (as long as it is safe to do so).
469 // This is intended to flatten the CFG even if the instruction is a division
470 // or other expensive operation. The speculation of an expensive instruction
471 // is expected to be undone in CodeGenPrepare if the speculation has not
472 // enabled further IR optimizations.
473 if (Cost > Budget &&
474 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
475 !Cost.isValid()))
476 return false;
477
478 // Okay, we can only really hoist these out if their operands do
479 // not take us over the cost threshold.
480 for (Use &Op : I->operands())
481 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
482 TTI, AC, Depth + 1))
483 return false;
484 // Okay, it's safe to do this! Remember this instruction.
485 AggressiveInsts.insert(I);
486 return true;
487}
488
489/// Extract ConstantInt from value, looking through IntToPtr
490/// and PointerNullValue. Return NULL if value is not a constant int.
492 // Normal constant int.
493 ConstantInt *CI = dyn_cast<ConstantInt>(V);
494 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
495 DL.isNonIntegralPointerType(V->getType()))
496 return CI;
497
498 // This is some kind of pointer constant. Turn it into a pointer-sized
499 // ConstantInt if possible.
500 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
501
502 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
503 if (isa<ConstantPointerNull>(V))
504 return ConstantInt::get(PtrTy, 0);
505
506 // IntToPtr const int.
507 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
508 if (CE->getOpcode() == Instruction::IntToPtr)
509 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
510 // The constant is very likely to have the right type already.
511 if (CI->getType() == PtrTy)
512 return CI;
513 else
514 return cast<ConstantInt>(
515 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
516 }
517 return nullptr;
518}
519
520namespace {
521
522/// Given a chain of or (||) or and (&&) comparison of a value against a
523/// constant, this will try to recover the information required for a switch
524/// structure.
525/// It will depth-first traverse the chain of comparison, seeking for patterns
526/// like %a == 12 or %a < 4 and combine them to produce a set of integer
527/// representing the different cases for the switch.
528/// Note that if the chain is composed of '||' it will build the set of elements
529/// that matches the comparisons (i.e. any of this value validate the chain)
530/// while for a chain of '&&' it will build the set elements that make the test
531/// fail.
532struct ConstantComparesGatherer {
533 const DataLayout &DL;
534
535 /// Value found for the switch comparison
536 Value *CompValue = nullptr;
537
538 /// Extra clause to be checked before the switch
539 Value *Extra = nullptr;
540
541 /// Set of integers to match in switch
543
544 /// Number of comparisons matched in the and/or chain
545 unsigned UsedICmps = 0;
546
547 /// Construct and compute the result for the comparison instruction Cond
548 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
549 gather(Cond);
550 }
551
552 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
553 ConstantComparesGatherer &
554 operator=(const ConstantComparesGatherer &) = delete;
555
556private:
557 /// Try to set the current value used for the comparison, it succeeds only if
558 /// it wasn't set before or if the new value is the same as the old one
559 bool setValueOnce(Value *NewVal) {
560 if (CompValue && CompValue != NewVal)
561 return false;
562 CompValue = NewVal;
563 return (CompValue != nullptr);
564 }
565
566 /// Try to match Instruction "I" as a comparison against a constant and
567 /// populates the array Vals with the set of values that match (or do not
568 /// match depending on isEQ).
569 /// Return false on failure. On success, the Value the comparison matched
570 /// against is placed in CompValue.
571 /// If CompValue is already set, the function is expected to fail if a match
572 /// is found but the value compared to is different.
573 bool matchInstruction(Instruction *I, bool isEQ) {
574 // If this is an icmp against a constant, handle this as one of the cases.
575 ICmpInst *ICI;
576 ConstantInt *C;
577 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
578 (C = getConstantInt(I->getOperand(1), DL)))) {
579 return false;
580 }
581
582 Value *RHSVal;
583 const APInt *RHSC;
584
585 // Pattern match a special case
586 // (x & ~2^z) == y --> x == y || x == y|2^z
587 // This undoes a transformation done by instcombine to fuse 2 compares.
588 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
589 // It's a little bit hard to see why the following transformations are
590 // correct. Here is a CVC3 program to verify them for 64-bit values:
591
592 /*
593 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
594 x : BITVECTOR(64);
595 y : BITVECTOR(64);
596 z : BITVECTOR(64);
597 mask : BITVECTOR(64) = BVSHL(ONE, z);
598 QUERY( (y & ~mask = y) =>
599 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
600 );
601 QUERY( (y | mask = y) =>
602 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
603 );
604 */
605
606 // Please note that each pattern must be a dual implication (<--> or
607 // iff). One directional implication can create spurious matches. If the
608 // implication is only one-way, an unsatisfiable condition on the left
609 // side can imply a satisfiable condition on the right side. Dual
610 // implication ensures that satisfiable conditions are transformed to
611 // other satisfiable conditions and unsatisfiable conditions are
612 // transformed to other unsatisfiable conditions.
613
614 // Here is a concrete example of a unsatisfiable condition on the left
615 // implying a satisfiable condition on the right:
616 //
617 // mask = (1 << z)
618 // (x & ~mask) == y --> (x == y || x == (y | mask))
619 //
620 // Substituting y = 3, z = 0 yields:
621 // (x & -2) == 3 --> (x == 3 || x == 2)
622
623 // Pattern match a special case:
624 /*
625 QUERY( (y & ~mask = y) =>
626 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
627 );
628 */
629 if (match(ICI->getOperand(0),
630 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
631 APInt Mask = ~*RHSC;
632 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
633 // If we already have a value for the switch, it has to match!
634 if (!setValueOnce(RHSVal))
635 return false;
636
637 Vals.push_back(C);
638 Vals.push_back(
639 ConstantInt::get(C->getContext(),
640 C->getValue() | Mask));
641 UsedICmps++;
642 return true;
643 }
644 }
645
646 // Pattern match a special case:
647 /*
648 QUERY( (y | mask = y) =>
649 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
650 );
651 */
652 if (match(ICI->getOperand(0),
653 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
654 APInt Mask = *RHSC;
655 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
656 // If we already have a value for the switch, it has to match!
657 if (!setValueOnce(RHSVal))
658 return false;
659
660 Vals.push_back(C);
661 Vals.push_back(ConstantInt::get(C->getContext(),
662 C->getValue() & ~Mask));
663 UsedICmps++;
664 return true;
665 }
666 }
667
668 // If we already have a value for the switch, it has to match!
669 if (!setValueOnce(ICI->getOperand(0)))
670 return false;
671
672 UsedICmps++;
673 Vals.push_back(C);
674 return ICI->getOperand(0);
675 }
676
677 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
678 ConstantRange Span =
680
681 // Shift the range if the compare is fed by an add. This is the range
682 // compare idiom as emitted by instcombine.
683 Value *CandidateVal = I->getOperand(0);
684 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
685 Span = Span.subtract(*RHSC);
686 CandidateVal = RHSVal;
687 }
688
689 // If this is an and/!= check, then we are looking to build the set of
690 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
691 // x != 0 && x != 1.
692 if (!isEQ)
693 Span = Span.inverse();
694
695 // If there are a ton of values, we don't want to make a ginormous switch.
696 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
697 return false;
698 }
699
700 // If we already have a value for the switch, it has to match!
701 if (!setValueOnce(CandidateVal))
702 return false;
703
704 // Add all values from the range to the set
705 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
706 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
707
708 UsedICmps++;
709 return true;
710 }
711
712 /// Given a potentially 'or'd or 'and'd together collection of icmp
713 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
714 /// the value being compared, and stick the list constants into the Vals
715 /// vector.
716 /// One "Extra" case is allowed to differ from the other.
717 void gather(Value *V) {
718 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
719
720 // Keep a stack (SmallVector for efficiency) for depth-first traversal
723
724 // Initialize
725 Visited.insert(V);
726 DFT.push_back(V);
727
728 while (!DFT.empty()) {
729 V = DFT.pop_back_val();
730
731 if (Instruction *I = dyn_cast<Instruction>(V)) {
732 // If it is a || (or && depending on isEQ), process the operands.
733 Value *Op0, *Op1;
734 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
735 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
736 if (Visited.insert(Op1).second)
737 DFT.push_back(Op1);
738 if (Visited.insert(Op0).second)
739 DFT.push_back(Op0);
740
741 continue;
742 }
743
744 // Try to match the current instruction
745 if (matchInstruction(I, isEQ))
746 // Match succeed, continue the loop
747 continue;
748 }
749
750 // One element of the sequence of || (or &&) could not be match as a
751 // comparison against the same value as the others.
752 // We allow only one "Extra" case to be checked before the switch
753 if (!Extra) {
754 Extra = V;
755 continue;
756 }
757 // Failed to parse a proper sequence, abort now
758 CompValue = nullptr;
759 break;
760 }
761 }
762};
763
764} // end anonymous namespace
765
767 MemorySSAUpdater *MSSAU = nullptr) {
768 Instruction *Cond = nullptr;
769 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
770 Cond = dyn_cast<Instruction>(SI->getCondition());
771 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
772 if (BI->isConditional())
773 Cond = dyn_cast<Instruction>(BI->getCondition());
774 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
775 Cond = dyn_cast<Instruction>(IBI->getAddress());
776 }
777
778 TI->eraseFromParent();
779 if (Cond)
781}
782
783/// Return true if the specified terminator checks
784/// to see if a value is equal to constant integer value.
785Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
786 Value *CV = nullptr;
787 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
788 // Do not permit merging of large switch instructions into their
789 // predecessors unless there is only one predecessor.
790 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
791 CV = SI->getCondition();
792 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
793 if (BI->isConditional() && BI->getCondition()->hasOneUse())
794 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
795 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
796 CV = ICI->getOperand(0);
797 }
798
799 // Unwrap any lossless ptrtoint cast.
800 if (CV) {
801 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
802 Value *Ptr = PTII->getPointerOperand();
803 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
804 CV = Ptr;
805 }
806 }
807 return CV;
808}
809
810/// Given a value comparison instruction,
811/// decode all of the 'cases' that it represents and return the 'default' block.
812BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
813 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
814 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
815 Cases.reserve(SI->getNumCases());
816 for (auto Case : SI->cases())
817 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
818 Case.getCaseSuccessor()));
819 return SI->getDefaultDest();
820 }
821
822 BranchInst *BI = cast<BranchInst>(TI);
823 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
824 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
825 Cases.push_back(ValueEqualityComparisonCase(
826 getConstantInt(ICI->getOperand(1), DL), Succ));
827 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
828}
829
830/// Given a vector of bb/value pairs, remove any entries
831/// in the list that match the specified block.
832static void
834 std::vector<ValueEqualityComparisonCase> &Cases) {
835 llvm::erase(Cases, BB);
836}
837
838/// Return true if there are any keys in C1 that exist in C2 as well.
839static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
840 std::vector<ValueEqualityComparisonCase> &C2) {
841 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
842
843 // Make V1 be smaller than V2.
844 if (V1->size() > V2->size())
845 std::swap(V1, V2);
846
847 if (V1->empty())
848 return false;
849 if (V1->size() == 1) {
850 // Just scan V2.
851 ConstantInt *TheVal = (*V1)[0].Value;
852 for (const ValueEqualityComparisonCase &VECC : *V2)
853 if (TheVal == VECC.Value)
854 return true;
855 }
856
857 // Otherwise, just sort both lists and compare element by element.
858 array_pod_sort(V1->begin(), V1->end());
859 array_pod_sort(V2->begin(), V2->end());
860 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
861 while (i1 != e1 && i2 != e2) {
862 if ((*V1)[i1].Value == (*V2)[i2].Value)
863 return true;
864 if ((*V1)[i1].Value < (*V2)[i2].Value)
865 ++i1;
866 else
867 ++i2;
868 }
869 return false;
870}
871
872// Set branch weights on SwitchInst. This sets the metadata if there is at
873// least one non-zero weight.
875 bool IsExpected) {
876 // Check that there is at least one non-zero weight. Otherwise, pass
877 // nullptr to setMetadata which will erase the existing metadata.
878 MDNode *N = nullptr;
879 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
880 N = MDBuilder(SI->getParent()->getContext())
881 .createBranchWeights(Weights, IsExpected);
882 SI->setMetadata(LLVMContext::MD_prof, N);
883}
884
885// Similar to the above, but for branch and select instructions that take
886// exactly 2 weights.
887static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
888 uint32_t FalseWeight, bool IsExpected) {
889 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
890 // Check that there is at least one non-zero weight. Otherwise, pass
891 // nullptr to setMetadata which will erase the existing metadata.
892 MDNode *N = nullptr;
893 if (TrueWeight || FalseWeight)
894 N = MDBuilder(I->getParent()->getContext())
895 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
896 I->setMetadata(LLVMContext::MD_prof, N);
897}
898
899/// If TI is known to be a terminator instruction and its block is known to
900/// only have a single predecessor block, check to see if that predecessor is
901/// also a value comparison with the same value, and if that comparison
902/// determines the outcome of this comparison. If so, simplify TI. This does a
903/// very limited form of jump threading.
904bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
905 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
906 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
907 if (!PredVal)
908 return false; // Not a value comparison in predecessor.
909
910 Value *ThisVal = isValueEqualityComparison(TI);
911 assert(ThisVal && "This isn't a value comparison!!");
912 if (ThisVal != PredVal)
913 return false; // Different predicates.
914
915 // TODO: Preserve branch weight metadata, similarly to how
916 // foldValueComparisonIntoPredecessors preserves it.
917
918 // Find out information about when control will move from Pred to TI's block.
919 std::vector<ValueEqualityComparisonCase> PredCases;
920 BasicBlock *PredDef =
921 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
922 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
923
924 // Find information about how control leaves this block.
925 std::vector<ValueEqualityComparisonCase> ThisCases;
926 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
927 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
928
929 // If TI's block is the default block from Pred's comparison, potentially
930 // simplify TI based on this knowledge.
931 if (PredDef == TI->getParent()) {
932 // If we are here, we know that the value is none of those cases listed in
933 // PredCases. If there are any cases in ThisCases that are in PredCases, we
934 // can simplify TI.
935 if (!valuesOverlap(PredCases, ThisCases))
936 return false;
937
938 if (isa<BranchInst>(TI)) {
939 // Okay, one of the successors of this condbr is dead. Convert it to a
940 // uncond br.
941 assert(ThisCases.size() == 1 && "Branch can only have one case!");
942 // Insert the new branch.
943 Instruction *NI = Builder.CreateBr(ThisDef);
944 (void)NI;
945
946 // Remove PHI node entries for the dead edge.
947 ThisCases[0].Dest->removePredecessor(PredDef);
948
949 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
950 << "Through successor TI: " << *TI << "Leaving: " << *NI
951 << "\n");
952
954
955 if (DTU)
956 DTU->applyUpdates(
957 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
958
959 return true;
960 }
961
962 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
963 // Okay, TI has cases that are statically dead, prune them away.
965 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
966 DeadCases.insert(PredCases[i].Value);
967
968 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
969 << "Through successor TI: " << *TI);
970
971 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
972 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
973 --i;
974 auto *Successor = i->getCaseSuccessor();
975 if (DTU)
976 ++NumPerSuccessorCases[Successor];
977 if (DeadCases.count(i->getCaseValue())) {
978 Successor->removePredecessor(PredDef);
979 SI.removeCase(i);
980 if (DTU)
981 --NumPerSuccessorCases[Successor];
982 }
983 }
984
985 if (DTU) {
986 std::vector<DominatorTree::UpdateType> Updates;
987 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
988 if (I.second == 0)
989 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
990 DTU->applyUpdates(Updates);
991 }
992
993 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
994 return true;
995 }
996
997 // Otherwise, TI's block must correspond to some matched value. Find out
998 // which value (or set of values) this is.
999 ConstantInt *TIV = nullptr;
1000 BasicBlock *TIBB = TI->getParent();
1001 for (const auto &[Value, Dest] : PredCases)
1002 if (Dest == TIBB) {
1003 if (TIV)
1004 return false; // Cannot handle multiple values coming to this block.
1005 TIV = Value;
1006 }
1007 assert(TIV && "No edge from pred to succ?");
1008
1009 // Okay, we found the one constant that our value can be if we get into TI's
1010 // BB. Find out which successor will unconditionally be branched to.
1011 BasicBlock *TheRealDest = nullptr;
1012 for (const auto &[Value, Dest] : ThisCases)
1013 if (Value == TIV) {
1014 TheRealDest = Dest;
1015 break;
1016 }
1017
1018 // If not handled by any explicit cases, it is handled by the default case.
1019 if (!TheRealDest)
1020 TheRealDest = ThisDef;
1021
1022 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1023
1024 // Remove PHI node entries for dead edges.
1025 BasicBlock *CheckEdge = TheRealDest;
1026 for (BasicBlock *Succ : successors(TIBB))
1027 if (Succ != CheckEdge) {
1028 if (Succ != TheRealDest)
1029 RemovedSuccs.insert(Succ);
1030 Succ->removePredecessor(TIBB);
1031 } else
1032 CheckEdge = nullptr;
1033
1034 // Insert the new branch.
1035 Instruction *NI = Builder.CreateBr(TheRealDest);
1036 (void)NI;
1037
1038 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1039 << "Through successor TI: " << *TI << "Leaving: " << *NI
1040 << "\n");
1041
1043 if (DTU) {
1045 Updates.reserve(RemovedSuccs.size());
1046 for (auto *RemovedSucc : RemovedSuccs)
1047 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1048 DTU->applyUpdates(Updates);
1049 }
1050 return true;
1051}
1052
1053namespace {
1054
1055/// This class implements a stable ordering of constant
1056/// integers that does not depend on their address. This is important for
1057/// applications that sort ConstantInt's to ensure uniqueness.
1058struct ConstantIntOrdering {
1059 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1060 return LHS->getValue().ult(RHS->getValue());
1061 }
1062};
1063
1064} // end anonymous namespace
1065
1067 ConstantInt *const *P2) {
1068 const ConstantInt *LHS = *P1;
1069 const ConstantInt *RHS = *P2;
1070 if (LHS == RHS)
1071 return 0;
1072 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1073}
1074
1075/// Get Weights of a given terminator, the default weight is at the front
1076/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1077/// metadata.
1079 SmallVectorImpl<uint64_t> &Weights) {
1080 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1081 assert(MD && "Invalid branch-weight metadata");
1082 extractFromBranchWeightMD64(MD, Weights);
1083
1084 // If TI is a conditional eq, the default case is the false case,
1085 // and the corresponding branch-weight data is at index 2. We swap the
1086 // default weight to be the first entry.
1087 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1088 assert(Weights.size() == 2);
1089 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1090 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1091 std::swap(Weights.front(), Weights.back());
1092 }
1093}
1094
1095/// Keep halving the weights until all can fit in uint32_t.
1097 uint64_t Max = *llvm::max_element(Weights);
1098 if (Max > UINT_MAX) {
1099 unsigned Offset = 32 - llvm::countl_zero(Max);
1100 for (uint64_t &I : Weights)
1101 I >>= Offset;
1102 }
1103}
1104
1106 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1107 Instruction *PTI = PredBlock->getTerminator();
1108
1109 // If we have bonus instructions, clone them into the predecessor block.
1110 // Note that there may be multiple predecessor blocks, so we cannot move
1111 // bonus instructions to a predecessor block.
1112 for (Instruction &BonusInst : *BB) {
1113 if (BonusInst.isTerminator())
1114 continue;
1115
1116 Instruction *NewBonusInst = BonusInst.clone();
1117
1118 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1119 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1120 // Unless the instruction has the same !dbg location as the original
1121 // branch, drop it. When we fold the bonus instructions we want to make
1122 // sure we reset their debug locations in order to avoid stepping on
1123 // dead code caused by folding dead branches.
1124 NewBonusInst->setDebugLoc(DebugLoc());
1125 }
1126
1127 RemapInstruction(NewBonusInst, VMap,
1129
1130 // If we speculated an instruction, we need to drop any metadata that may
1131 // result in undefined behavior, as the metadata might have been valid
1132 // only given the branch precondition.
1133 // Similarly strip attributes on call parameters that may cause UB in
1134 // location the call is moved to.
1135 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1136
1137 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1138 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1139 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1141
1142 if (isa<DbgInfoIntrinsic>(BonusInst))
1143 continue;
1144
1145 NewBonusInst->takeName(&BonusInst);
1146 BonusInst.setName(NewBonusInst->getName() + ".old");
1147 VMap[&BonusInst] = NewBonusInst;
1148
1149 // Update (liveout) uses of bonus instructions,
1150 // now that the bonus instruction has been cloned into predecessor.
1151 // Note that we expect to be in a block-closed SSA form for this to work!
1152 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1153 auto *UI = cast<Instruction>(U.getUser());
1154 auto *PN = dyn_cast<PHINode>(UI);
1155 if (!PN) {
1156 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1157 "If the user is not a PHI node, then it should be in the same "
1158 "block as, and come after, the original bonus instruction.");
1159 continue; // Keep using the original bonus instruction.
1160 }
1161 // Is this the block-closed SSA form PHI node?
1162 if (PN->getIncomingBlock(U) == BB)
1163 continue; // Great, keep using the original bonus instruction.
1164 // The only other alternative is an "use" when coming from
1165 // the predecessor block - here we should refer to the cloned bonus instr.
1166 assert(PN->getIncomingBlock(U) == PredBlock &&
1167 "Not in block-closed SSA form?");
1168 U.set(NewBonusInst);
1169 }
1170 }
1171}
1172
1173bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1174 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1175 BasicBlock *BB = TI->getParent();
1176 BasicBlock *Pred = PTI->getParent();
1177
1179
1180 // Figure out which 'cases' to copy from SI to PSI.
1181 std::vector<ValueEqualityComparisonCase> BBCases;
1182 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1183
1184 std::vector<ValueEqualityComparisonCase> PredCases;
1185 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1186
1187 // Based on whether the default edge from PTI goes to BB or not, fill in
1188 // PredCases and PredDefault with the new switch cases we would like to
1189 // build.
1191
1192 // Update the branch weight metadata along the way
1194 bool PredHasWeights = hasBranchWeightMD(*PTI);
1195 bool SuccHasWeights = hasBranchWeightMD(*TI);
1196
1197 if (PredHasWeights) {
1198 getBranchWeights(PTI, Weights);
1199 // branch-weight metadata is inconsistent here.
1200 if (Weights.size() != 1 + PredCases.size())
1201 PredHasWeights = SuccHasWeights = false;
1202 } else if (SuccHasWeights)
1203 // If there are no predecessor weights but there are successor weights,
1204 // populate Weights with 1, which will later be scaled to the sum of
1205 // successor's weights
1206 Weights.assign(1 + PredCases.size(), 1);
1207
1208 SmallVector<uint64_t, 8> SuccWeights;
1209 if (SuccHasWeights) {
1210 getBranchWeights(TI, SuccWeights);
1211 // branch-weight metadata is inconsistent here.
1212 if (SuccWeights.size() != 1 + BBCases.size())
1213 PredHasWeights = SuccHasWeights = false;
1214 } else if (PredHasWeights)
1215 SuccWeights.assign(1 + BBCases.size(), 1);
1216
1217 if (PredDefault == BB) {
1218 // If this is the default destination from PTI, only the edges in TI
1219 // that don't occur in PTI, or that branch to BB will be activated.
1220 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1221 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1222 if (PredCases[i].Dest != BB)
1223 PTIHandled.insert(PredCases[i].Value);
1224 else {
1225 // The default destination is BB, we don't need explicit targets.
1226 std::swap(PredCases[i], PredCases.back());
1227
1228 if (PredHasWeights || SuccHasWeights) {
1229 // Increase weight for the default case.
1230 Weights[0] += Weights[i + 1];
1231 std::swap(Weights[i + 1], Weights.back());
1232 Weights.pop_back();
1233 }
1234
1235 PredCases.pop_back();
1236 --i;
1237 --e;
1238 }
1239
1240 // Reconstruct the new switch statement we will be building.
1241 if (PredDefault != BBDefault) {
1242 PredDefault->removePredecessor(Pred);
1243 if (DTU && PredDefault != BB)
1244 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1245 PredDefault = BBDefault;
1246 ++NewSuccessors[BBDefault];
1247 }
1248
1249 unsigned CasesFromPred = Weights.size();
1250 uint64_t ValidTotalSuccWeight = 0;
1251 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1252 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1253 PredCases.push_back(BBCases[i]);
1254 ++NewSuccessors[BBCases[i].Dest];
1255 if (SuccHasWeights || PredHasWeights) {
1256 // The default weight is at index 0, so weight for the ith case
1257 // should be at index i+1. Scale the cases from successor by
1258 // PredDefaultWeight (Weights[0]).
1259 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1260 ValidTotalSuccWeight += SuccWeights[i + 1];
1261 }
1262 }
1263
1264 if (SuccHasWeights || PredHasWeights) {
1265 ValidTotalSuccWeight += SuccWeights[0];
1266 // Scale the cases from predecessor by ValidTotalSuccWeight.
1267 for (unsigned i = 1; i < CasesFromPred; ++i)
1268 Weights[i] *= ValidTotalSuccWeight;
1269 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1270 Weights[0] *= SuccWeights[0];
1271 }
1272 } else {
1273 // If this is not the default destination from PSI, only the edges
1274 // in SI that occur in PSI with a destination of BB will be
1275 // activated.
1276 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1277 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1278 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1279 if (PredCases[i].Dest == BB) {
1280 PTIHandled.insert(PredCases[i].Value);
1281
1282 if (PredHasWeights || SuccHasWeights) {
1283 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1284 std::swap(Weights[i + 1], Weights.back());
1285 Weights.pop_back();
1286 }
1287
1288 std::swap(PredCases[i], PredCases.back());
1289 PredCases.pop_back();
1290 --i;
1291 --e;
1292 }
1293
1294 // Okay, now we know which constants were sent to BB from the
1295 // predecessor. Figure out where they will all go now.
1296 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1297 if (PTIHandled.count(BBCases[i].Value)) {
1298 // If this is one we are capable of getting...
1299 if (PredHasWeights || SuccHasWeights)
1300 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1301 PredCases.push_back(BBCases[i]);
1302 ++NewSuccessors[BBCases[i].Dest];
1303 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1304 }
1305
1306 // If there are any constants vectored to BB that TI doesn't handle,
1307 // they must go to the default destination of TI.
1308 for (ConstantInt *I : PTIHandled) {
1309 if (PredHasWeights || SuccHasWeights)
1310 Weights.push_back(WeightsForHandled[I]);
1311 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1312 ++NewSuccessors[BBDefault];
1313 }
1314 }
1315
1316 // Okay, at this point, we know which new successor Pred will get. Make
1317 // sure we update the number of entries in the PHI nodes for these
1318 // successors.
1319 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1320 if (DTU) {
1321 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1322 Updates.reserve(Updates.size() + NewSuccessors.size());
1323 }
1324 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1325 NewSuccessors) {
1326 for (auto I : seq(NewSuccessor.second)) {
1327 (void)I;
1328 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1329 }
1330 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1331 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1332 }
1333
1334 Builder.SetInsertPoint(PTI);
1335 // Convert pointer to int before we switch.
1336 if (CV->getType()->isPointerTy()) {
1337 CV =
1338 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1339 }
1340
1341 // Now that the successors are updated, create the new Switch instruction.
1342 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1343 NewSI->setDebugLoc(PTI->getDebugLoc());
1344 for (ValueEqualityComparisonCase &V : PredCases)
1345 NewSI->addCase(V.Value, V.Dest);
1346
1347 if (PredHasWeights || SuccHasWeights) {
1348 // Halve the weights if any of them cannot fit in an uint32_t
1349 fitWeights(Weights);
1350
1351 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1352
1353 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1354 }
1355
1357
1358 // Okay, last check. If BB is still a successor of PSI, then we must
1359 // have an infinite loop case. If so, add an infinitely looping block
1360 // to handle the case to preserve the behavior of the code.
1361 BasicBlock *InfLoopBlock = nullptr;
1362 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1363 if (NewSI->getSuccessor(i) == BB) {
1364 if (!InfLoopBlock) {
1365 // Insert it at the end of the function, because it's either code,
1366 // or it won't matter if it's hot. :)
1367 InfLoopBlock =
1368 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1369 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1370 if (DTU)
1371 Updates.push_back(
1372 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1373 }
1374 NewSI->setSuccessor(i, InfLoopBlock);
1375 }
1376
1377 if (DTU) {
1378 if (InfLoopBlock)
1379 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1380
1381 Updates.push_back({DominatorTree::Delete, Pred, BB});
1382
1383 DTU->applyUpdates(Updates);
1384 }
1385
1386 ++NumFoldValueComparisonIntoPredecessors;
1387 return true;
1388}
1389
1390/// The specified terminator is a value equality comparison instruction
1391/// (either a switch or a branch on "X == c").
1392/// See if any of the predecessors of the terminator block are value comparisons
1393/// on the same value. If so, and if safe to do so, fold them together.
1394bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1395 IRBuilder<> &Builder) {
1396 BasicBlock *BB = TI->getParent();
1397 Value *CV = isValueEqualityComparison(TI); // CondVal
1398 assert(CV && "Not a comparison?");
1399
1400 bool Changed = false;
1401
1403 while (!Preds.empty()) {
1404 BasicBlock *Pred = Preds.pop_back_val();
1405 Instruction *PTI = Pred->getTerminator();
1406
1407 // Don't try to fold into itself.
1408 if (Pred == BB)
1409 continue;
1410
1411 // See if the predecessor is a comparison with the same value.
1412 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1413 if (PCV != CV)
1414 continue;
1415
1417 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1418 for (auto *Succ : FailBlocks) {
1419 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1420 return false;
1421 }
1422 }
1423
1424 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1425 Changed = true;
1426 }
1427 return Changed;
1428}
1429
1430// If we would need to insert a select that uses the value of this invoke
1431// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1432// need to do this), we can't hoist the invoke, as there is nowhere to put the
1433// select in this case.
1435 Instruction *I1, Instruction *I2) {
1436 for (BasicBlock *Succ : successors(BB1)) {
1437 for (const PHINode &PN : Succ->phis()) {
1438 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1439 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1440 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1441 return false;
1442 }
1443 }
1444 }
1445 return true;
1446}
1447
1448// Get interesting characteristics of instructions that
1449// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1450// instructions can be reordered across.
1456
1458 unsigned Flags = 0;
1459 if (I->mayReadFromMemory())
1460 Flags |= SkipReadMem;
1461 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1462 // inalloca) across stacksave/stackrestore boundaries.
1463 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1464 Flags |= SkipSideEffect;
1466 Flags |= SkipImplicitControlFlow;
1467 return Flags;
1468}
1469
1470// Returns true if it is safe to reorder an instruction across preceding
1471// instructions in a basic block.
1472static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1473 // Don't reorder a store over a load.
1474 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1475 return false;
1476
1477 // If we have seen an instruction with side effects, it's unsafe to reorder an
1478 // instruction which reads memory or itself has side effects.
1479 if ((Flags & SkipSideEffect) &&
1480 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1481 return false;
1482
1483 // Reordering across an instruction which does not necessarily transfer
1484 // control to the next instruction is speculation.
1486 return false;
1487
1488 // Hoisting of llvm.deoptimize is only legal together with the next return
1489 // instruction, which this pass is not always able to do.
1490 if (auto *CB = dyn_cast<CallBase>(I))
1491 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1492 return false;
1493
1494 // It's also unsafe/illegal to hoist an instruction above its instruction
1495 // operands
1496 BasicBlock *BB = I->getParent();
1497 for (Value *Op : I->operands()) {
1498 if (auto *J = dyn_cast<Instruction>(Op))
1499 if (J->getParent() == BB)
1500 return false;
1501 }
1502
1503 return true;
1504}
1505
1506static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1507
1508/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1509/// instructions \p I1 and \p I2 can and should be hoisted.
1511 const TargetTransformInfo &TTI) {
1512 // If we're going to hoist a call, make sure that the two instructions
1513 // we're commoning/hoisting are both marked with musttail, or neither of
1514 // them is marked as such. Otherwise, we might end up in a situation where
1515 // we hoist from a block where the terminator is a `ret` to a block where
1516 // the terminator is a `br`, and `musttail` calls expect to be followed by
1517 // a return.
1518 auto *C1 = dyn_cast<CallInst>(I1);
1519 auto *C2 = dyn_cast<CallInst>(I2);
1520 if (C1 && C2)
1521 if (C1->isMustTailCall() != C2->isMustTailCall())
1522 return false;
1523
1525 return false;
1526
1527 // If any of the two call sites has nomerge or convergent attribute, stop
1528 // hoisting.
1529 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1530 if (CB1->cannotMerge() || CB1->isConvergent())
1531 return false;
1532 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1533 if (CB2->cannotMerge() || CB2->isConvergent())
1534 return false;
1535
1536 return true;
1537}
1538
1539/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1540/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1541/// hoistCommonCodeFromSuccessors. e.g. The input:
1542/// I1 DVRs: { x, z },
1543/// OtherInsts: { I2 DVRs: { x, y, z } }
1544/// would result in hoisting only DbgVariableRecord x.
1546 Instruction *TI, Instruction *I1,
1547 SmallVectorImpl<Instruction *> &OtherInsts) {
1548 if (!I1->hasDbgRecords())
1549 return;
1550 using CurrentAndEndIt =
1551 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1552 // Vector of {Current, End} iterators.
1554 Itrs.reserve(OtherInsts.size() + 1);
1555 // Helper lambdas for lock-step checks:
1556 // Return true if this Current == End.
1557 auto atEnd = [](const CurrentAndEndIt &Pair) {
1558 return Pair.first == Pair.second;
1559 };
1560 // Return true if all Current are identical.
1561 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1562 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1564 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1565 });
1566 };
1567
1568 // Collect the iterators.
1569 Itrs.push_back(
1570 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1571 for (Instruction *Other : OtherInsts) {
1572 if (!Other->hasDbgRecords())
1573 return;
1574 Itrs.push_back(
1575 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1576 }
1577
1578 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1579 // the lock-step DbgRecord are identical, hoist all of them to TI.
1580 // This replicates the dbg.* intrinsic behaviour in
1581 // hoistCommonCodeFromSuccessors.
1582 while (none_of(Itrs, atEnd)) {
1583 bool HoistDVRs = allIdentical(Itrs);
1584 for (CurrentAndEndIt &Pair : Itrs) {
1585 // Increment Current iterator now as we may be about to move the
1586 // DbgRecord.
1587 DbgRecord &DR = *Pair.first++;
1588 if (HoistDVRs) {
1589 DR.removeFromParent();
1590 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1591 }
1592 }
1593 }
1594}
1595
1597 const Instruction *I2) {
1598 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1599 return true;
1600
1601 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1602 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1603 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1604 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1605 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1606
1607 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1608 return I1->getOperand(0) == I2->getOperand(1) &&
1609 I1->getOperand(1) == I2->getOperand(0) &&
1610 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1611 }
1612
1613 return false;
1614}
1615
1616/// If the target supports conditional faulting,
1617/// we look for the following pattern:
1618/// \code
1619/// BB:
1620/// ...
1621/// %cond = icmp ult %x, %y
1622/// br i1 %cond, label %TrueBB, label %FalseBB
1623/// FalseBB:
1624/// store i32 1, ptr %q, align 4
1625/// ...
1626/// TrueBB:
1627/// %maskedloadstore = load i32, ptr %b, align 4
1628/// store i32 %maskedloadstore, ptr %p, align 4
1629/// ...
1630/// \endcode
1631///
1632/// and transform it into:
1633///
1634/// \code
1635/// BB:
1636/// ...
1637/// %cond = icmp ult %x, %y
1638/// %maskedloadstore = cload i32, ptr %b, %cond
1639/// cstore i32 %maskedloadstore, ptr %p, %cond
1640/// cstore i32 1, ptr %q, ~%cond
1641/// br i1 %cond, label %TrueBB, label %FalseBB
1642/// FalseBB:
1643/// ...
1644/// TrueBB:
1645/// ...
1646/// \endcode
1647///
1648/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1649/// e.g.
1650///
1651/// \code
1652/// %vcond = bitcast i1 %cond to <1 x i1>
1653/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1654/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1655/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1656/// call void @llvm.masked.store.v1i32.p0
1657/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1658/// %cond.not = xor i1 %cond, true
1659/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1660/// call void @llvm.masked.store.v1i32.p0
1661/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1662/// \endcode
1663///
1664/// So we need to turn hoisted load/store into cload/cstore.
1665///
1666/// \param BI The branch instruction.
1667/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1668/// will be speculated.
1669/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1671 BranchInst *BI,
1672 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1673 std::optional<bool> Invert) {
1674 auto &Context = BI->getParent()->getContext();
1675 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1676 auto *Cond = BI->getOperand(0);
1677 // Construct the condition if needed.
1678 BasicBlock *BB = BI->getParent();
1679 IRBuilder<> Builder(
1680 Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
1681 Value *Mask = nullptr;
1682 Value *MaskFalse = nullptr;
1683 Value *MaskTrue = nullptr;
1684 if (Invert.has_value()) {
1685 Mask = Builder.CreateBitCast(
1686 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1687 VCondTy);
1688 } else {
1689 MaskFalse = Builder.CreateBitCast(
1690 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1691 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1692 }
1693 auto PeekThroughBitcasts = [](Value *V) {
1694 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1695 V = BitCast->getOperand(0);
1696 return V;
1697 };
1698 for (auto *I : SpeculatedConditionalLoadsStores) {
1699 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1700 if (!Invert.has_value())
1701 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1702 // We currently assume conditional faulting load/store is supported for
1703 // scalar types only when creating new instructions. This can be easily
1704 // extended for vector types in the future.
1705 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1706 auto *Op0 = I->getOperand(0);
1707 CallInst *MaskedLoadStore = nullptr;
1708 if (auto *LI = dyn_cast<LoadInst>(I)) {
1709 // Handle Load.
1710 auto *Ty = I->getType();
1711 PHINode *PN = nullptr;
1712 Value *PassThru = nullptr;
1713 if (Invert.has_value())
1714 for (User *U : I->users())
1715 if ((PN = dyn_cast<PHINode>(U))) {
1716 PassThru = Builder.CreateBitCast(
1717 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1718 FixedVectorType::get(Ty, 1));
1719 break;
1720 }
1721 MaskedLoadStore = Builder.CreateMaskedLoad(
1722 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1723 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1724 if (PN)
1725 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1726 I->replaceAllUsesWith(NewLoadStore);
1727 } else {
1728 // Handle Store.
1729 auto *StoredVal = Builder.CreateBitCast(
1730 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1731 MaskedLoadStore = Builder.CreateMaskedStore(
1732 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1733 }
1734 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1735 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1736 //
1737 // !nonnull, !align : Not support pointer type, no need to keep.
1738 // !range: Load type is changed from scalar to vector, but the metadata on
1739 // vector specifies a per-element range, so the semantics stay the
1740 // same. Keep it.
1741 // !annotation: Not impact semantics. Keep it.
1742 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1743 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1744 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1745 // FIXME: DIAssignID is not supported for masked store yet.
1746 // (Verifier::visitDIAssignIDMetadata)
1748 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1749 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1750 });
1751 MaskedLoadStore->copyMetadata(*I);
1752 I->eraseFromParent();
1753 }
1754}
1755
1757 const TargetTransformInfo &TTI) {
1758 // Not handle volatile or atomic.
1759 if (auto *L = dyn_cast<LoadInst>(I)) {
1760 if (!L->isSimple())
1761 return false;
1762 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1763 if (!S->isSimple())
1764 return false;
1765 } else
1766 return false;
1767
1768 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1769 // That's why we have the alignment limitation.
1770 // FIXME: Update the prototype of the intrinsics?
1773}
1774
1775namespace {
1776
1777// LockstepReverseIterator - Iterates through instructions
1778// in a set of blocks in reverse order from the first non-terminator.
1779// For example (assume all blocks have size n):
1780// LockstepReverseIterator I([B1, B2, B3]);
1781// *I-- = [B1[n], B2[n], B3[n]];
1782// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1783// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1784// ...
1785class LockstepReverseIterator {
1788 bool Fail;
1789
1790public:
1791 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
1792 reset();
1793 }
1794
1795 void reset() {
1796 Fail = false;
1797 Insts.clear();
1798 for (auto *BB : Blocks) {
1799 Instruction *Inst = BB->getTerminator();
1800 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1801 Inst = Inst->getPrevNode();
1802 if (!Inst) {
1803 // Block wasn't big enough.
1804 Fail = true;
1805 return;
1806 }
1807 Insts.push_back(Inst);
1808 }
1809 }
1810
1811 bool isValid() const { return !Fail; }
1812
1813 void operator--() {
1814 if (Fail)
1815 return;
1816 for (auto *&Inst : Insts) {
1817 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1818 Inst = Inst->getPrevNode();
1819 // Already at beginning of block.
1820 if (!Inst) {
1821 Fail = true;
1822 return;
1823 }
1824 }
1825 }
1826
1827 void operator++() {
1828 if (Fail)
1829 return;
1830 for (auto *&Inst : Insts) {
1831 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1832 Inst = Inst->getNextNode();
1833 // Already at end of block.
1834 if (!Inst) {
1835 Fail = true;
1836 return;
1837 }
1838 }
1839 }
1840
1841 ArrayRef<Instruction *> operator*() const { return Insts; }
1842};
1843
1844} // end anonymous namespace
1845
1846/// Hoist any common code in the successor blocks up into the block. This
1847/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1848/// given, only perform hoisting in case all successors blocks contain matching
1849/// instructions only. In that case, all instructions can be hoisted and the
1850/// original branch will be replaced and selects for PHIs are added.
1851bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1852 bool AllInstsEqOnly) {
1853 // This does very trivial matching, with limited scanning, to find identical
1854 // instructions in the two blocks. In particular, we don't want to get into
1855 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1856 // such, we currently just scan for obviously identical instructions in an
1857 // identical order, possibly separated by the same number of non-identical
1858 // instructions.
1859 BasicBlock *BB = TI->getParent();
1860 unsigned int SuccSize = succ_size(BB);
1861 if (SuccSize < 2)
1862 return false;
1863
1864 // If either of the blocks has it's address taken, then we can't do this fold,
1865 // because the code we'd hoist would no longer run when we jump into the block
1866 // by it's address.
1867 for (auto *Succ : successors(BB))
1868 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1869 return false;
1870
1871 // The second of pair is a SkipFlags bitmask.
1872 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1873 SmallVector<SuccIterPair, 8> SuccIterPairs;
1874 for (auto *Succ : successors(BB)) {
1875 BasicBlock::iterator SuccItr = Succ->begin();
1876 if (isa<PHINode>(*SuccItr))
1877 return false;
1878 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1879 }
1880
1881 if (AllInstsEqOnly) {
1882 // Check if all instructions in the successor blocks match. This allows
1883 // hoisting all instructions and removing the blocks we are hoisting from,
1884 // so does not add any new instructions.
1886 // Check if sizes and terminators of all successors match.
1887 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1888 Instruction *Term0 = Succs[0]->getTerminator();
1889 Instruction *Term = Succ->getTerminator();
1890 return !Term->isSameOperationAs(Term0) ||
1891 !equal(Term->operands(), Term0->operands()) ||
1892 Succs[0]->size() != Succ->size();
1893 });
1894 if (!AllSame)
1895 return false;
1896 if (AllSame) {
1897 LockstepReverseIterator LRI(Succs);
1898 while (LRI.isValid()) {
1899 Instruction *I0 = (*LRI)[0];
1900 if (any_of(*LRI, [I0](Instruction *I) {
1901 return !areIdenticalUpToCommutativity(I0, I);
1902 })) {
1903 return false;
1904 }
1905 --LRI;
1906 }
1907 }
1908 // Now we know that all instructions in all successors can be hoisted. Let
1909 // the loop below handle the hoisting.
1910 }
1911
1912 // Count how many instructions were not hoisted so far. There's a limit on how
1913 // many instructions we skip, serving as a compilation time control as well as
1914 // preventing excessive increase of life ranges.
1915 unsigned NumSkipped = 0;
1916 // If we find an unreachable instruction at the beginning of a basic block, we
1917 // can still hoist instructions from the rest of the basic blocks.
1918 if (SuccIterPairs.size() > 2) {
1919 erase_if(SuccIterPairs,
1920 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1921 if (SuccIterPairs.size() < 2)
1922 return false;
1923 }
1924
1925 bool Changed = false;
1926
1927 for (;;) {
1928 auto *SuccIterPairBegin = SuccIterPairs.begin();
1929 auto &BB1ItrPair = *SuccIterPairBegin++;
1930 auto OtherSuccIterPairRange =
1931 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1932 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1933
1934 Instruction *I1 = &*BB1ItrPair.first;
1935
1936 // Skip debug info if it is not identical.
1937 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1938 Instruction *I2 = &*Iter;
1939 return I1->isIdenticalToWhenDefined(I2);
1940 });
1941 if (!AllDbgInstsAreIdentical) {
1942 while (isa<DbgInfoIntrinsic>(I1))
1943 I1 = &*++BB1ItrPair.first;
1944 for (auto &SuccIter : OtherSuccIterRange) {
1945 Instruction *I2 = &*SuccIter;
1946 while (isa<DbgInfoIntrinsic>(I2))
1947 I2 = &*++SuccIter;
1948 }
1949 }
1950
1951 bool AllInstsAreIdentical = true;
1952 bool HasTerminator = I1->isTerminator();
1953 for (auto &SuccIter : OtherSuccIterRange) {
1954 Instruction *I2 = &*SuccIter;
1955 HasTerminator |= I2->isTerminator();
1956 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1957 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1958 AllInstsAreIdentical = false;
1959 }
1960
1962 for (auto &SuccIter : OtherSuccIterRange)
1963 OtherInsts.push_back(&*SuccIter);
1964
1965 // If we are hoisting the terminator instruction, don't move one (making a
1966 // broken BB), instead clone it, and remove BI.
1967 if (HasTerminator) {
1968 // Even if BB, which contains only one unreachable instruction, is ignored
1969 // at the beginning of the loop, we can hoist the terminator instruction.
1970 // If any instructions remain in the block, we cannot hoist terminators.
1971 if (NumSkipped || !AllInstsAreIdentical) {
1972 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1973 return Changed;
1974 }
1975
1976 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1977 Changed;
1978 }
1979
1980 if (AllInstsAreIdentical) {
1981 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1982 AllInstsAreIdentical =
1983 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1984 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1985 Instruction *I2 = &*Pair.first;
1986 unsigned SkipFlagsBB2 = Pair.second;
1987 // Even if the instructions are identical, it may not
1988 // be safe to hoist them if we have skipped over
1989 // instructions with side effects or their operands
1990 // weren't hoisted.
1991 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1993 });
1994 }
1995
1996 if (AllInstsAreIdentical) {
1997 BB1ItrPair.first++;
1998 if (isa<DbgInfoIntrinsic>(I1)) {
1999 // The debug location is an integral part of a debug info intrinsic
2000 // and can't be separated from it or replaced. Instead of attempting
2001 // to merge locations, simply hoist both copies of the intrinsic.
2002 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2003 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2004 // and leave any that were not hoisted behind (by calling moveBefore
2005 // rather than moveBeforePreserving).
2006 I1->moveBefore(TI);
2007 for (auto &SuccIter : OtherSuccIterRange) {
2008 auto *I2 = &*SuccIter++;
2009 assert(isa<DbgInfoIntrinsic>(I2));
2010 I2->moveBefore(TI);
2011 }
2012 } else {
2013 // For a normal instruction, we just move one to right before the
2014 // branch, then replace all uses of the other with the first. Finally,
2015 // we remove the now redundant second instruction.
2016 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2017 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2018 // and leave any that were not hoisted behind (by calling moveBefore
2019 // rather than moveBeforePreserving).
2020 I1->moveBefore(TI);
2021 for (auto &SuccIter : OtherSuccIterRange) {
2022 Instruction *I2 = &*SuccIter++;
2023 assert(I2 != I1);
2024 if (!I2->use_empty())
2025 I2->replaceAllUsesWith(I1);
2026 I1->andIRFlags(I2);
2027 if (auto *CB = dyn_cast<CallBase>(I1)) {
2028 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2029 assert(Success && "We should not be trying to hoist callbases "
2030 "with non-intersectable attributes");
2031 // For NDEBUG Compile.
2032 (void)Success;
2033 }
2034
2035 combineMetadataForCSE(I1, I2, true);
2036 // I1 and I2 are being combined into a single instruction. Its debug
2037 // location is the merged locations of the original instructions.
2038 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2039 I2->eraseFromParent();
2040 }
2041 }
2042 if (!Changed)
2043 NumHoistCommonCode += SuccIterPairs.size();
2044 Changed = true;
2045 NumHoistCommonInstrs += SuccIterPairs.size();
2046 } else {
2047 if (NumSkipped >= HoistCommonSkipLimit) {
2048 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2049 return Changed;
2050 }
2051 // We are about to skip over a pair of non-identical instructions. Record
2052 // if any have characteristics that would prevent reordering instructions
2053 // across them.
2054 for (auto &SuccIterPair : SuccIterPairs) {
2055 Instruction *I = &*SuccIterPair.first++;
2056 SuccIterPair.second |= skippedInstrFlags(I);
2057 }
2058 ++NumSkipped;
2059 }
2060 }
2061}
2062
2063bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2064 Instruction *TI, Instruction *I1,
2065 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2066
2067 auto *BI = dyn_cast<BranchInst>(TI);
2068
2069 bool Changed = false;
2070 BasicBlock *TIParent = TI->getParent();
2071 BasicBlock *BB1 = I1->getParent();
2072
2073 // Use only for an if statement.
2074 auto *I2 = *OtherSuccTIs.begin();
2075 auto *BB2 = I2->getParent();
2076 if (BI) {
2077 assert(OtherSuccTIs.size() == 1);
2078 assert(BI->getSuccessor(0) == I1->getParent());
2079 assert(BI->getSuccessor(1) == I2->getParent());
2080 }
2081
2082 // In the case of an if statement, we try to hoist an invoke.
2083 // FIXME: Can we define a safety predicate for CallBr?
2084 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2085 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2086 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2087 return false;
2088
2089 // TODO: callbr hoisting currently disabled pending further study.
2090 if (isa<CallBrInst>(I1))
2091 return false;
2092
2093 for (BasicBlock *Succ : successors(BB1)) {
2094 for (PHINode &PN : Succ->phis()) {
2095 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2096 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2097 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2098 if (BB1V == BB2V)
2099 continue;
2100
2101 // In the case of an if statement, check for
2102 // passingValueIsAlwaysUndefined here because we would rather eliminate
2103 // undefined control flow then converting it to a select.
2104 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2106 return false;
2107 }
2108 }
2109 }
2110
2111 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2112 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2113 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2114 // Clone the terminator and hoist it into the pred, without any debug info.
2115 Instruction *NT = I1->clone();
2116 NT->insertInto(TIParent, TI->getIterator());
2117 if (!NT->getType()->isVoidTy()) {
2118 I1->replaceAllUsesWith(NT);
2119 for (Instruction *OtherSuccTI : OtherSuccTIs)
2120 OtherSuccTI->replaceAllUsesWith(NT);
2121 NT->takeName(I1);
2122 }
2123 Changed = true;
2124 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2125
2126 // Ensure terminator gets a debug location, even an unknown one, in case
2127 // it involves inlinable calls.
2129 Locs.push_back(I1->getDebugLoc());
2130 for (auto *OtherSuccTI : OtherSuccTIs)
2131 Locs.push_back(OtherSuccTI->getDebugLoc());
2132 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
2133
2134 // PHIs created below will adopt NT's merged DebugLoc.
2135 IRBuilder<NoFolder> Builder(NT);
2136
2137 // In the case of an if statement, hoisting one of the terminators from our
2138 // successor is a great thing. Unfortunately, the successors of the if/else
2139 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2140 // must agree for all PHI nodes, so we insert select instruction to compute
2141 // the final result.
2142 if (BI) {
2143 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2144 for (BasicBlock *Succ : successors(BB1)) {
2145 for (PHINode &PN : Succ->phis()) {
2146 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2147 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2148 if (BB1V == BB2V)
2149 continue;
2150
2151 // These values do not agree. Insert a select instruction before NT
2152 // that determines the right value.
2153 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2154 if (!SI) {
2155 // Propagate fast-math-flags from phi node to its replacement select.
2156 SI = cast<SelectInst>(Builder.CreateSelectFMF(
2157 BI->getCondition(), BB1V, BB2V,
2158 isa<FPMathOperator>(PN) ? &PN : nullptr,
2159 BB1V->getName() + "." + BB2V->getName(), BI));
2160 }
2161
2162 // Make the PHI node use the select for all incoming values for BB1/BB2
2163 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2164 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2165 PN.setIncomingValue(i, SI);
2166 }
2167 }
2168 }
2169
2171
2172 // Update any PHI nodes in our new successors.
2173 for (BasicBlock *Succ : successors(BB1)) {
2174 addPredecessorToBlock(Succ, TIParent, BB1);
2175 if (DTU)
2176 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2177 }
2178
2179 if (DTU)
2180 for (BasicBlock *Succ : successors(TI))
2181 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2182
2184 if (DTU)
2185 DTU->applyUpdates(Updates);
2186 return Changed;
2187}
2188
2189// Check lifetime markers.
2190static bool isLifeTimeMarker(const Instruction *I) {
2191 if (auto II = dyn_cast<IntrinsicInst>(I)) {
2192 switch (II->getIntrinsicID()) {
2193 default:
2194 break;
2195 case Intrinsic::lifetime_start:
2196 case Intrinsic::lifetime_end:
2197 return true;
2198 }
2199 }
2200 return false;
2201}
2202
2203// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2204// into variables.
2206 int OpIdx) {
2207 // Divide/Remainder by constant is typically much cheaper than by variable.
2208 if (I->isIntDivRem())
2209 return OpIdx != 1;
2210 return !isa<IntrinsicInst>(I);
2211}
2212
2213// All instructions in Insts belong to different blocks that all unconditionally
2214// branch to a common successor. Analyze each instruction and return true if it
2215// would be possible to sink them into their successor, creating one common
2216// instruction instead. For every value that would be required to be provided by
2217// PHI node (because an operand varies in each input block), add to PHIOperands.
2220 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2221 // Prune out obviously bad instructions to move. Each instruction must have
2222 // the same number of uses, and we check later that the uses are consistent.
2223 std::optional<unsigned> NumUses;
2224 for (auto *I : Insts) {
2225 // These instructions may change or break semantics if moved.
2226 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2227 I->getType()->isTokenTy())
2228 return false;
2229
2230 // Do not try to sink an instruction in an infinite loop - it can cause
2231 // this algorithm to infinite loop.
2232 if (I->getParent()->getSingleSuccessor() == I->getParent())
2233 return false;
2234
2235 // Conservatively return false if I is an inline-asm instruction. Sinking
2236 // and merging inline-asm instructions can potentially create arguments
2237 // that cannot satisfy the inline-asm constraints.
2238 // If the instruction has nomerge or convergent attribute, return false.
2239 if (const auto *C = dyn_cast<CallBase>(I))
2240 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2241 return false;
2242
2243 if (!NumUses)
2244 NumUses = I->getNumUses();
2245 else if (NumUses != I->getNumUses())
2246 return false;
2247 }
2248
2249 const Instruction *I0 = Insts.front();
2250 const auto I0MMRA = MMRAMetadata(*I0);
2251 for (auto *I : Insts) {
2252 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2253 return false;
2254
2255 // swifterror pointers can only be used by a load or store; sinking a load
2256 // or store would require introducing a select for the pointer operand,
2257 // which isn't allowed for swifterror pointers.
2258 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
2259 return false;
2260 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
2261 return false;
2262
2263 // Treat MMRAs conservatively. This pass can be quite aggressive and
2264 // could drop a lot of MMRAs otherwise.
2265 if (MMRAMetadata(*I) != I0MMRA)
2266 return false;
2267 }
2268
2269 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2270 // then the other phi operands must match the instructions from Insts. This
2271 // also has to hold true for any phi nodes that would be created as a result
2272 // of sinking. Both of these cases are represented by PhiOperands.
2273 for (const Use &U : I0->uses()) {
2274 auto It = PHIOperands.find(&U);
2275 if (It == PHIOperands.end())
2276 // There may be uses in other blocks when sinking into a loop header.
2277 return false;
2278 if (!equal(Insts, It->second))
2279 return false;
2280 }
2281
2282 // For calls to be sinkable, they must all be indirect, or have same callee.
2283 // I.e. if we have two direct calls to different callees, we don't want to
2284 // turn that into an indirect call. Likewise, if we have an indirect call,
2285 // and a direct call, we don't actually want to have a single indirect call.
2286 if (isa<CallBase>(I0)) {
2287 auto IsIndirectCall = [](const Instruction *I) {
2288 return cast<CallBase>(I)->isIndirectCall();
2289 };
2290 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2291 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2292 if (HaveIndirectCalls) {
2293 if (!AllCallsAreIndirect)
2294 return false;
2295 } else {
2296 // All callees must be identical.
2297 Value *Callee = nullptr;
2298 for (const Instruction *I : Insts) {
2299 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2300 if (!Callee)
2301 Callee = CurrCallee;
2302 else if (Callee != CurrCallee)
2303 return false;
2304 }
2305 }
2306 }
2307
2308 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2309 Value *Op = I0->getOperand(OI);
2310 if (Op->getType()->isTokenTy())
2311 // Don't touch any operand of token type.
2312 return false;
2313
2314 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2315 assert(I->getNumOperands() == I0->getNumOperands());
2316 return I->getOperand(OI) == I0->getOperand(OI);
2317 };
2318 if (!all_of(Insts, SameAsI0)) {
2319 // SROA can't speculate lifetime markers of selects/phis, and the
2320 // backend may handle such lifetimes incorrectly as well (#104776).
2321 // Don't sink lifetimes if it would introduce a phi on the pointer
2322 // argument.
2323 if (isLifeTimeMarker(I0) && OI == 1 &&
2324 any_of(Insts, [](const Instruction *I) {
2325 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2326 }))
2327 return false;
2328
2329 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2331 // We can't create a PHI from this GEP.
2332 return false;
2333 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2334 for (auto *I : Insts)
2335 Ops.push_back(I->getOperand(OI));
2336 }
2337 }
2338 return true;
2339}
2340
2341// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2342// instruction of every block in Blocks to their common successor, commoning
2343// into one instruction.
2345 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2346
2347 // canSinkInstructions returning true guarantees that every block has at
2348 // least one non-terminator instruction.
2350 for (auto *BB : Blocks) {
2351 Instruction *I = BB->getTerminator();
2352 do {
2353 I = I->getPrevNode();
2354 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2355 if (!isa<DbgInfoIntrinsic>(I))
2356 Insts.push_back(I);
2357 }
2358
2359 // We don't need to do any more checking here; canSinkInstructions should
2360 // have done it all for us.
2361 SmallVector<Value*, 4> NewOperands;
2362 Instruction *I0 = Insts.front();
2363 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2364 // This check is different to that in canSinkInstructions. There, we
2365 // cared about the global view once simplifycfg (and instcombine) have
2366 // completed - it takes into account PHIs that become trivially
2367 // simplifiable. However here we need a more local view; if an operand
2368 // differs we create a PHI and rely on instcombine to clean up the very
2369 // small mess we may make.
2370 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2371 return I->getOperand(O) != I0->getOperand(O);
2372 });
2373 if (!NeedPHI) {
2374 NewOperands.push_back(I0->getOperand(O));
2375 continue;
2376 }
2377
2378 // Create a new PHI in the successor block and populate it.
2379 auto *Op = I0->getOperand(O);
2380 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2381 auto *PN =
2382 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2383 PN->insertBefore(BBEnd->begin());
2384 for (auto *I : Insts)
2385 PN->addIncoming(I->getOperand(O), I->getParent());
2386 NewOperands.push_back(PN);
2387 }
2388
2389 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2390 // and move it to the start of the successor block.
2391 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2392 I0->getOperandUse(O).set(NewOperands[O]);
2393
2394 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2395
2396 // Update metadata and IR flags, and merge debug locations.
2397 for (auto *I : Insts)
2398 if (I != I0) {
2399 // The debug location for the "common" instruction is the merged locations
2400 // of all the commoned instructions. We start with the original location
2401 // of the "common" instruction and iteratively merge each location in the
2402 // loop below.
2403 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2404 // However, as N-way merge for CallInst is rare, so we use simplified API
2405 // instead of using complex API for N-way merge.
2406 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2407 combineMetadataForCSE(I0, I, true);
2408 I0->andIRFlags(I);
2409 if (auto *CB = dyn_cast<CallBase>(I0)) {
2410 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2411 assert(Success && "We should not be trying to sink callbases "
2412 "with non-intersectable attributes");
2413 // For NDEBUG Compile.
2414 (void)Success;
2415 }
2416 }
2417
2418 for (User *U : make_early_inc_range(I0->users())) {
2419 // canSinkLastInstruction checked that all instructions are only used by
2420 // phi nodes in a way that allows replacing the phi node with the common
2421 // instruction.
2422 auto *PN = cast<PHINode>(U);
2423 PN->replaceAllUsesWith(I0);
2424 PN->eraseFromParent();
2425 }
2426
2427 // Finally nuke all instructions apart from the common instruction.
2428 for (auto *I : Insts) {
2429 if (I == I0)
2430 continue;
2431 // The remaining uses are debug users, replace those with the common inst.
2432 // In most (all?) cases this just introduces a use-before-def.
2433 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2434 I->replaceAllUsesWith(I0);
2435 I->eraseFromParent();
2436 }
2437}
2438
2439/// Check whether BB's predecessors end with unconditional branches. If it is
2440/// true, sink any common code from the predecessors to BB.
2442 DomTreeUpdater *DTU) {
2443 // We support two situations:
2444 // (1) all incoming arcs are unconditional
2445 // (2) there are non-unconditional incoming arcs
2446 //
2447 // (2) is very common in switch defaults and
2448 // else-if patterns;
2449 //
2450 // if (a) f(1);
2451 // else if (b) f(2);
2452 //
2453 // produces:
2454 //
2455 // [if]
2456 // / \
2457 // [f(1)] [if]
2458 // | | \
2459 // | | |
2460 // | [f(2)]|
2461 // \ | /
2462 // [ end ]
2463 //
2464 // [end] has two unconditional predecessor arcs and one conditional. The
2465 // conditional refers to the implicit empty 'else' arc. This conditional
2466 // arc can also be caused by an empty default block in a switch.
2467 //
2468 // In this case, we attempt to sink code from all *unconditional* arcs.
2469 // If we can sink instructions from these arcs (determined during the scan
2470 // phase below) we insert a common successor for all unconditional arcs and
2471 // connect that to [end], to enable sinking:
2472 //
2473 // [if]
2474 // / \
2475 // [x(1)] [if]
2476 // | | \
2477 // | | \
2478 // | [x(2)] |
2479 // \ / |
2480 // [sink.split] |
2481 // \ /
2482 // [ end ]
2483 //
2484 SmallVector<BasicBlock*,4> UnconditionalPreds;
2485 bool HaveNonUnconditionalPredecessors = false;
2486 for (auto *PredBB : predecessors(BB)) {
2487 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2488 if (PredBr && PredBr->isUnconditional())
2489 UnconditionalPreds.push_back(PredBB);
2490 else
2491 HaveNonUnconditionalPredecessors = true;
2492 }
2493 if (UnconditionalPreds.size() < 2)
2494 return false;
2495
2496 // We take a two-step approach to tail sinking. First we scan from the end of
2497 // each block upwards in lockstep. If the n'th instruction from the end of each
2498 // block can be sunk, those instructions are added to ValuesToSink and we
2499 // carry on. If we can sink an instruction but need to PHI-merge some operands
2500 // (because they're not identical in each instruction) we add these to
2501 // PHIOperands.
2502 // We prepopulate PHIOperands with the phis that already exist in BB.
2504 for (PHINode &PN : BB->phis()) {
2506 for (const Use &U : PN.incoming_values())
2507 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2508 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2509 for (BasicBlock *Pred : UnconditionalPreds)
2510 Ops.push_back(*IncomingVals[Pred]);
2511 }
2512
2513 int ScanIdx = 0;
2514 SmallPtrSet<Value*,4> InstructionsToSink;
2515 LockstepReverseIterator LRI(UnconditionalPreds);
2516 while (LRI.isValid() &&
2517 canSinkInstructions(*LRI, PHIOperands)) {
2518 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2519 << "\n");
2520 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2521 ++ScanIdx;
2522 --LRI;
2523 }
2524
2525 // If no instructions can be sunk, early-return.
2526 if (ScanIdx == 0)
2527 return false;
2528
2529 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2530
2531 if (!followedByDeoptOrUnreachable) {
2532 // Check whether this is the pointer operand of a load/store.
2533 auto IsMemOperand = [](Use &U) {
2534 auto *I = cast<Instruction>(U.getUser());
2535 if (isa<LoadInst>(I))
2536 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2537 if (isa<StoreInst>(I))
2538 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2539 return false;
2540 };
2541
2542 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2543 // actually sink before encountering instruction that is unprofitable to
2544 // sink?
2545 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2546 unsigned NumPHIInsts = 0;
2547 for (Use &U : (*LRI)[0]->operands()) {
2548 auto It = PHIOperands.find(&U);
2549 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2550 return InstructionsToSink.contains(V);
2551 })) {
2552 ++NumPHIInsts;
2553 // Do not separate a load/store from the gep producing the address.
2554 // The gep can likely be folded into the load/store as an addressing
2555 // mode. Additionally, a load of a gep is easier to analyze than a
2556 // load of a phi.
2557 if (IsMemOperand(U) &&
2558 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2559 return false;
2560 // FIXME: this check is overly optimistic. We may end up not sinking
2561 // said instruction, due to the very same profitability check.
2562 // See @creating_too_many_phis in sink-common-code.ll.
2563 }
2564 }
2565 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2566 return NumPHIInsts <= 1;
2567 };
2568
2569 // We've determined that we are going to sink last ScanIdx instructions,
2570 // and recorded them in InstructionsToSink. Now, some instructions may be
2571 // unprofitable to sink. But that determination depends on the instructions
2572 // that we are going to sink.
2573
2574 // First, forward scan: find the first instruction unprofitable to sink,
2575 // recording all the ones that are profitable to sink.
2576 // FIXME: would it be better, after we detect that not all are profitable.
2577 // to either record the profitable ones, or erase the unprofitable ones?
2578 // Maybe we need to choose (at runtime) the one that will touch least
2579 // instrs?
2580 LRI.reset();
2581 int Idx = 0;
2582 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2583 while (Idx < ScanIdx) {
2584 if (!ProfitableToSinkInstruction(LRI)) {
2585 // Too many PHIs would be created.
2586 LLVM_DEBUG(
2587 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2588 break;
2589 }
2590 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2591 --LRI;
2592 ++Idx;
2593 }
2594
2595 // If no instructions can be sunk, early-return.
2596 if (Idx == 0)
2597 return false;
2598
2599 // Did we determine that (only) some instructions are unprofitable to sink?
2600 if (Idx < ScanIdx) {
2601 // Okay, some instructions are unprofitable.
2602 ScanIdx = Idx;
2603 InstructionsToSink = InstructionsProfitableToSink;
2604
2605 // But, that may make other instructions unprofitable, too.
2606 // So, do a backward scan, do any earlier instructions become
2607 // unprofitable?
2608 assert(
2609 !ProfitableToSinkInstruction(LRI) &&
2610 "We already know that the last instruction is unprofitable to sink");
2611 ++LRI;
2612 --Idx;
2613 while (Idx >= 0) {
2614 // If we detect that an instruction becomes unprofitable to sink,
2615 // all earlier instructions won't be sunk either,
2616 // so preemptively keep InstructionsProfitableToSink in sync.
2617 // FIXME: is this the most performant approach?
2618 for (auto *I : *LRI)
2619 InstructionsProfitableToSink.erase(I);
2620 if (!ProfitableToSinkInstruction(LRI)) {
2621 // Everything starting with this instruction won't be sunk.
2622 ScanIdx = Idx;
2623 InstructionsToSink = InstructionsProfitableToSink;
2624 }
2625 ++LRI;
2626 --Idx;
2627 }
2628 }
2629
2630 // If no instructions can be sunk, early-return.
2631 if (ScanIdx == 0)
2632 return false;
2633 }
2634
2635 bool Changed = false;
2636
2637 if (HaveNonUnconditionalPredecessors) {
2638 if (!followedByDeoptOrUnreachable) {
2639 // It is always legal to sink common instructions from unconditional
2640 // predecessors. However, if not all predecessors are unconditional,
2641 // this transformation might be pessimizing. So as a rule of thumb,
2642 // don't do it unless we'd sink at least one non-speculatable instruction.
2643 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2644 LRI.reset();
2645 int Idx = 0;
2646 bool Profitable = false;
2647 while (Idx < ScanIdx) {
2648 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2649 Profitable = true;
2650 break;
2651 }
2652 --LRI;
2653 ++Idx;
2654 }
2655 if (!Profitable)
2656 return false;
2657 }
2658
2659 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2660 // We have a conditional edge and we're going to sink some instructions.
2661 // Insert a new block postdominating all blocks we're going to sink from.
2662 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2663 // Edges couldn't be split.
2664 return false;
2665 Changed = true;
2666 }
2667
2668 // Now that we've analyzed all potential sinking candidates, perform the
2669 // actual sink. We iteratively sink the last non-terminator of the source
2670 // blocks into their common successor unless doing so would require too
2671 // many PHI instructions to be generated (currently only one PHI is allowed
2672 // per sunk instruction).
2673 //
2674 // We can use InstructionsToSink to discount values needing PHI-merging that will
2675 // actually be sunk in a later iteration. This allows us to be more
2676 // aggressive in what we sink. This does allow a false positive where we
2677 // sink presuming a later value will also be sunk, but stop half way through
2678 // and never actually sink it which means we produce more PHIs than intended.
2679 // This is unlikely in practice though.
2680 int SinkIdx = 0;
2681 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2682 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2683 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2684 << "\n");
2685
2686 // Because we've sunk every instruction in turn, the current instruction to
2687 // sink is always at index 0.
2688 LRI.reset();
2689
2690 sinkLastInstruction(UnconditionalPreds);
2691 NumSinkCommonInstrs++;
2692 Changed = true;
2693 }
2694 if (SinkIdx != 0)
2695 ++NumSinkCommonCode;
2696 return Changed;
2697}
2698
2699namespace {
2700
2701struct CompatibleSets {
2702 using SetTy = SmallVector<InvokeInst *, 2>;
2703
2705
2706 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2707
2708 SetTy &getCompatibleSet(InvokeInst *II);
2709
2710 void insert(InvokeInst *II);
2711};
2712
2713CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2714 // Perform a linear scan over all the existing sets, see if the new `invoke`
2715 // is compatible with any particular set. Since we know that all the `invokes`
2716 // within a set are compatible, only check the first `invoke` in each set.
2717 // WARNING: at worst, this has quadratic complexity.
2718 for (CompatibleSets::SetTy &Set : Sets) {
2719 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2720 return Set;
2721 }
2722
2723 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2724 return Sets.emplace_back();
2725}
2726
2727void CompatibleSets::insert(InvokeInst *II) {
2728 getCompatibleSet(II).emplace_back(II);
2729}
2730
2731bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2732 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2733
2734 // Can we theoretically merge these `invoke`s?
2735 auto IsIllegalToMerge = [](InvokeInst *II) {
2736 return II->cannotMerge() || II->isInlineAsm();
2737 };
2738 if (any_of(Invokes, IsIllegalToMerge))
2739 return false;
2740
2741 // Either both `invoke`s must be direct,
2742 // or both `invoke`s must be indirect.
2743 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2744 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2745 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2746 if (HaveIndirectCalls) {
2747 if (!AllCallsAreIndirect)
2748 return false;
2749 } else {
2750 // All callees must be identical.
2751 Value *Callee = nullptr;
2752 for (InvokeInst *II : Invokes) {
2753 Value *CurrCallee = II->getCalledOperand();
2754 assert(CurrCallee && "There is always a called operand.");
2755 if (!Callee)
2756 Callee = CurrCallee;
2757 else if (Callee != CurrCallee)
2758 return false;
2759 }
2760 }
2761
2762 // Either both `invoke`s must not have a normal destination,
2763 // or both `invoke`s must have a normal destination,
2764 auto HasNormalDest = [](InvokeInst *II) {
2765 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2766 };
2767 if (any_of(Invokes, HasNormalDest)) {
2768 // Do not merge `invoke` that does not have a normal destination with one
2769 // that does have a normal destination, even though doing so would be legal.
2770 if (!all_of(Invokes, HasNormalDest))
2771 return false;
2772
2773 // All normal destinations must be identical.
2774 BasicBlock *NormalBB = nullptr;
2775 for (InvokeInst *II : Invokes) {
2776 BasicBlock *CurrNormalBB = II->getNormalDest();
2777 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2778 if (!NormalBB)
2779 NormalBB = CurrNormalBB;
2780 else if (NormalBB != CurrNormalBB)
2781 return false;
2782 }
2783
2784 // In the normal destination, the incoming values for these two `invoke`s
2785 // must be compatible.
2786 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2788 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2789 &EquivalenceSet))
2790 return false;
2791 }
2792
2793#ifndef NDEBUG
2794 // All unwind destinations must be identical.
2795 // We know that because we have started from said unwind destination.
2796 BasicBlock *UnwindBB = nullptr;
2797 for (InvokeInst *II : Invokes) {
2798 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2799 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2800 if (!UnwindBB)
2801 UnwindBB = CurrUnwindBB;
2802 else
2803 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2804 }
2805#endif
2806
2807 // In the unwind destination, the incoming values for these two `invoke`s
2808 // must be compatible.
2810 Invokes.front()->getUnwindDest(),
2811 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2812 return false;
2813
2814 // Ignoring arguments, these `invoke`s must be identical,
2815 // including operand bundles.
2816 const InvokeInst *II0 = Invokes.front();
2817 for (auto *II : Invokes.drop_front())
2818 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2819 return false;
2820
2821 // Can we theoretically form the data operands for the merged `invoke`?
2822 auto IsIllegalToMergeArguments = [](auto Ops) {
2823 Use &U0 = std::get<0>(Ops);
2824 Use &U1 = std::get<1>(Ops);
2825 if (U0 == U1)
2826 return false;
2827 return U0->getType()->isTokenTy() ||
2828 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2829 U0.getOperandNo());
2830 };
2831 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2832 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2833 IsIllegalToMergeArguments))
2834 return false;
2835
2836 return true;
2837}
2838
2839} // namespace
2840
2841// Merge all invokes in the provided set, all of which are compatible
2842// as per the `CompatibleSets::shouldBelongToSameSet()`.
2844 DomTreeUpdater *DTU) {
2845 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2846
2848 if (DTU)
2849 Updates.reserve(2 + 3 * Invokes.size());
2850
2851 bool HasNormalDest =
2852 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2853
2854 // Clone one of the invokes into a new basic block.
2855 // Since they are all compatible, it doesn't matter which invoke is cloned.
2856 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2857 InvokeInst *II0 = Invokes.front();
2858 BasicBlock *II0BB = II0->getParent();
2859 BasicBlock *InsertBeforeBlock =
2860 II0->getParent()->getIterator()->getNextNode();
2861 Function *Func = II0BB->getParent();
2862 LLVMContext &Ctx = II0->getContext();
2863
2864 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2865 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2866
2867 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2868 // NOTE: all invokes have the same attributes, so no handling needed.
2869 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2870
2871 if (!HasNormalDest) {
2872 // This set does not have a normal destination,
2873 // so just form a new block with unreachable terminator.
2874 BasicBlock *MergedNormalDest = BasicBlock::Create(
2875 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2876 new UnreachableInst(Ctx, MergedNormalDest);
2877 MergedInvoke->setNormalDest(MergedNormalDest);
2878 }
2879
2880 // The unwind destination, however, remainds identical for all invokes here.
2881
2882 return MergedInvoke;
2883 }();
2884
2885 if (DTU) {
2886 // Predecessor blocks that contained these invokes will now branch to
2887 // the new block that contains the merged invoke, ...
2888 for (InvokeInst *II : Invokes)
2889 Updates.push_back(
2890 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2891
2892 // ... which has the new `unreachable` block as normal destination,
2893 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2894 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2895 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2896 SuccBBOfMergedInvoke});
2897
2898 // Since predecessor blocks now unconditionally branch to a new block,
2899 // they no longer branch to their original successors.
2900 for (InvokeInst *II : Invokes)
2901 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2902 Updates.push_back(
2903 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2904 }
2905
2906 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2907
2908 // Form the merged operands for the merged invoke.
2909 for (Use &U : MergedInvoke->operands()) {
2910 // Only PHI together the indirect callees and data operands.
2911 if (MergedInvoke->isCallee(&U)) {
2912 if (!IsIndirectCall)
2913 continue;
2914 } else if (!MergedInvoke->isDataOperand(&U))
2915 continue;
2916
2917 // Don't create trivial PHI's with all-identical incoming values.
2918 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2919 return II->getOperand(U.getOperandNo()) != U.get();
2920 });
2921 if (!NeedPHI)
2922 continue;
2923
2924 // Form a PHI out of all the data ops under this index.
2926 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2927 for (InvokeInst *II : Invokes)
2928 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2929
2930 U.set(PN);
2931 }
2932
2933 // We've ensured that each PHI node has compatible (identical) incoming values
2934 // when coming from each of the `invoke`s in the current merge set,
2935 // so update the PHI nodes accordingly.
2936 for (BasicBlock *Succ : successors(MergedInvoke))
2937 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2938 /*ExistPred=*/Invokes.front()->getParent());
2939
2940 // And finally, replace the original `invoke`s with an unconditional branch
2941 // to the block with the merged `invoke`. Also, give that merged `invoke`
2942 // the merged debugloc of all the original `invoke`s.
2943 DILocation *MergedDebugLoc = nullptr;
2944 for (InvokeInst *II : Invokes) {
2945 // Compute the debug location common to all the original `invoke`s.
2946 if (!MergedDebugLoc)
2947 MergedDebugLoc = II->getDebugLoc();
2948 else
2949 MergedDebugLoc =
2950 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2951
2952 // And replace the old `invoke` with an unconditionally branch
2953 // to the block with the merged `invoke`.
2954 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2955 OrigSuccBB->removePredecessor(II->getParent());
2956 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2957 // The unconditional branch is part of the replacement for the original
2958 // invoke, so should use its DebugLoc.
2959 BI->setDebugLoc(II->getDebugLoc());
2960 bool Success = MergedInvoke->tryIntersectAttributes(II);
2961 assert(Success && "Merged invokes with incompatible attributes");
2962 // For NDEBUG Compile
2963 (void)Success;
2964 II->replaceAllUsesWith(MergedInvoke);
2965 II->eraseFromParent();
2966 ++NumInvokesMerged;
2967 }
2968 MergedInvoke->setDebugLoc(MergedDebugLoc);
2969 ++NumInvokeSetsFormed;
2970
2971 if (DTU)
2972 DTU->applyUpdates(Updates);
2973}
2974
2975/// If this block is a `landingpad` exception handling block, categorize all
2976/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2977/// being "mergeable" together, and then merge invokes in each set together.
2978///
2979/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2980/// [...] [...]
2981/// | |
2982/// [invoke0] [invoke1]
2983/// / \ / \
2984/// [cont0] [landingpad] [cont1]
2985/// to:
2986/// [...] [...]
2987/// \ /
2988/// [invoke]
2989/// / \
2990/// [cont] [landingpad]
2991///
2992/// But of course we can only do that if the invokes share the `landingpad`,
2993/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2994/// and the invoked functions are "compatible".
2997 return false;
2998
2999 bool Changed = false;
3000
3001 // FIXME: generalize to all exception handling blocks?
3002 if (!BB->isLandingPad())
3003 return Changed;
3004
3005 CompatibleSets Grouper;
3006
3007 // Record all the predecessors of this `landingpad`. As per verifier,
3008 // the only allowed predecessor is the unwind edge of an `invoke`.
3009 // We want to group "compatible" `invokes` into the same set to be merged.
3010 for (BasicBlock *PredBB : predecessors(BB))
3011 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
3012
3013 // And now, merge `invoke`s that were grouped togeter.
3014 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
3015 if (Invokes.size() < 2)
3016 continue;
3017 Changed = true;
3018 mergeCompatibleInvokesImpl(Invokes, DTU);
3019 }
3020
3021 return Changed;
3022}
3023
3024namespace {
3025/// Track ephemeral values, which should be ignored for cost-modelling
3026/// purposes. Requires walking instructions in reverse order.
3027class EphemeralValueTracker {
3029
3030 bool isEphemeral(const Instruction *I) {
3031 if (isa<AssumeInst>(I))
3032 return true;
3033 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3034 all_of(I->users(), [&](const User *U) {
3035 return EphValues.count(cast<Instruction>(U));
3036 });
3037 }
3038
3039public:
3040 bool track(const Instruction *I) {
3041 if (isEphemeral(I)) {
3042 EphValues.insert(I);
3043 return true;
3044 }
3045 return false;
3046 }
3047
3048 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3049};
3050} // namespace
3051
3052/// Determine if we can hoist sink a sole store instruction out of a
3053/// conditional block.
3054///
3055/// We are looking for code like the following:
3056/// BrBB:
3057/// store i32 %add, i32* %arrayidx2
3058/// ... // No other stores or function calls (we could be calling a memory
3059/// ... // function).
3060/// %cmp = icmp ult %x, %y
3061/// br i1 %cmp, label %EndBB, label %ThenBB
3062/// ThenBB:
3063/// store i32 %add5, i32* %arrayidx2
3064/// br label EndBB
3065/// EndBB:
3066/// ...
3067/// We are going to transform this into:
3068/// BrBB:
3069/// store i32 %add, i32* %arrayidx2
3070/// ... //
3071/// %cmp = icmp ult %x, %y
3072/// %add.add5 = select i1 %cmp, i32 %add, %add5
3073/// store i32 %add.add5, i32* %arrayidx2
3074/// ...
3075///
3076/// \return The pointer to the value of the previous store if the store can be
3077/// hoisted into the predecessor block. 0 otherwise.
3079 BasicBlock *StoreBB, BasicBlock *EndBB) {
3080 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3081 if (!StoreToHoist)
3082 return nullptr;
3083
3084 // Volatile or atomic.
3085 if (!StoreToHoist->isSimple())
3086 return nullptr;
3087
3088 Value *StorePtr = StoreToHoist->getPointerOperand();
3089 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3090
3091 // Look for a store to the same pointer in BrBB.
3092 unsigned MaxNumInstToLookAt = 9;
3093 // Skip pseudo probe intrinsic calls which are not really killing any memory
3094 // accesses.
3095 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3096 if (!MaxNumInstToLookAt)
3097 break;
3098 --MaxNumInstToLookAt;
3099
3100 // Could be calling an instruction that affects memory like free().
3101 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3102 return nullptr;
3103
3104 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3105 // Found the previous store to same location and type. Make sure it is
3106 // simple, to avoid introducing a spurious non-atomic write after an
3107 // atomic write.
3108 if (SI->getPointerOperand() == StorePtr &&
3109 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3110 SI->getAlign() >= StoreToHoist->getAlign())
3111 // Found the previous store, return its value operand.
3112 return SI->getValueOperand();
3113 return nullptr; // Unknown store.
3114 }
3115
3116 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3117 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3118 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3119 Value *Obj = getUnderlyingObject(StorePtr);
3120 bool ExplicitlyDereferenceableOnly;
3121 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3122 !PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3123 /*StoreCaptures=*/true) &&
3124 (!ExplicitlyDereferenceableOnly ||
3125 isDereferenceablePointer(StorePtr, StoreTy,
3126 LI->getDataLayout()))) {
3127 // Found a previous load, return it.
3128 return LI;
3129 }
3130 }
3131 // The load didn't work out, but we may still find a store.
3132 }
3133 }
3134
3135 return nullptr;
3136}
3137
3138/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3139/// converted to selects.
3141 BasicBlock *EndBB,
3142 unsigned &SpeculatedInstructions,
3144 const TargetTransformInfo &TTI) {
3146 BB->getParent()->hasMinSize()
3149
3150 bool HaveRewritablePHIs = false;
3151 for (PHINode &PN : EndBB->phis()) {
3152 Value *OrigV = PN.getIncomingValueForBlock(BB);
3153 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3154
3155 // FIXME: Try to remove some of the duplication with
3156 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3157 if (ThenV == OrigV)
3158 continue;
3159
3160 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
3162
3163 // Don't convert to selects if we could remove undefined behavior instead.
3164 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3166 return false;
3167
3168 HaveRewritablePHIs = true;
3169 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3170 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3171 if (!OrigCE && !ThenCE)
3172 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3173
3174 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3175 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3176 InstructionCost MaxCost =
3178 if (OrigCost + ThenCost > MaxCost)
3179 return false;
3180
3181 // Account for the cost of an unfolded ConstantExpr which could end up
3182 // getting expanded into Instructions.
3183 // FIXME: This doesn't account for how many operations are combined in the
3184 // constant expression.
3185 ++SpeculatedInstructions;
3186 if (SpeculatedInstructions > 1)
3187 return false;
3188 }
3189
3190 return HaveRewritablePHIs;
3191}
3192
3194 std::optional<bool> Invert,
3195 const TargetTransformInfo &TTI) {
3196 // If the branch is non-unpredictable, and is predicted to *not* branch to
3197 // the `then` block, then avoid speculating it.
3198 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3199 return true;
3200
3201 uint64_t TWeight, FWeight;
3202 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3203 return true;
3204
3205 if (!Invert.has_value())
3206 return false;
3207
3208 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3209 BranchProbability BIEndProb =
3210 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3212 return BIEndProb < Likely;
3213}
3214
3215/// Speculate a conditional basic block flattening the CFG.
3216///
3217/// Note that this is a very risky transform currently. Speculating
3218/// instructions like this is most often not desirable. Instead, there is an MI
3219/// pass which can do it with full awareness of the resource constraints.
3220/// However, some cases are "obvious" and we should do directly. An example of
3221/// this is speculating a single, reasonably cheap instruction.
3222///
3223/// There is only one distinct advantage to flattening the CFG at the IR level:
3224/// it makes very common but simplistic optimizations such as are common in
3225/// instcombine and the DAG combiner more powerful by removing CFG edges and
3226/// modeling their effects with easier to reason about SSA value graphs.
3227///
3228///
3229/// An illustration of this transform is turning this IR:
3230/// \code
3231/// BB:
3232/// %cmp = icmp ult %x, %y
3233/// br i1 %cmp, label %EndBB, label %ThenBB
3234/// ThenBB:
3235/// %sub = sub %x, %y
3236/// br label BB2
3237/// EndBB:
3238/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3239/// ...
3240/// \endcode
3241///
3242/// Into this IR:
3243/// \code
3244/// BB:
3245/// %cmp = icmp ult %x, %y
3246/// %sub = sub %x, %y
3247/// %cond = select i1 %cmp, 0, %sub
3248/// ...
3249/// \endcode
3250///
3251/// \returns true if the conditional block is removed.
3252bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3253 BasicBlock *ThenBB) {
3254 if (!Options.SpeculateBlocks)
3255 return false;
3256
3257 // Be conservative for now. FP select instruction can often be expensive.
3258 Value *BrCond = BI->getCondition();
3259 if (isa<FCmpInst>(BrCond))
3260 return false;
3261
3262 BasicBlock *BB = BI->getParent();
3263 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3264 InstructionCost Budget =
3266
3267 // If ThenBB is actually on the false edge of the conditional branch, remember
3268 // to swap the select operands later.
3269 bool Invert = false;
3270 if (ThenBB != BI->getSuccessor(0)) {
3271 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3272 Invert = true;
3273 }
3274 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3275
3276 if (!isProfitableToSpeculate(BI, Invert, TTI))
3277 return false;
3278
3279 // Keep a count of how many times instructions are used within ThenBB when
3280 // they are candidates for sinking into ThenBB. Specifically:
3281 // - They are defined in BB, and
3282 // - They have no side effects, and
3283 // - All of their uses are in ThenBB.
3284 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3285
3286 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3287
3288 unsigned SpeculatedInstructions = 0;
3289 bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3290 Options.HoistLoadsStoresWithCondFaulting;
3291 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3292 Value *SpeculatedStoreValue = nullptr;
3293 StoreInst *SpeculatedStore = nullptr;
3294 EphemeralValueTracker EphTracker;
3295 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3296 // Skip debug info.
3297 if (isa<DbgInfoIntrinsic>(I)) {
3298 SpeculatedDbgIntrinsics.push_back(&I);
3299 continue;
3300 }
3301
3302 // Skip pseudo probes. The consequence is we lose track of the branch
3303 // probability for ThenBB, which is fine since the optimization here takes
3304 // place regardless of the branch probability.
3305 if (isa<PseudoProbeInst>(I)) {
3306 // The probe should be deleted so that it will not be over-counted when
3307 // the samples collected on the non-conditional path are counted towards
3308 // the conditional path. We leave it for the counts inference algorithm to
3309 // figure out a proper count for an unknown probe.
3310 SpeculatedDbgIntrinsics.push_back(&I);
3311 continue;
3312 }
3313
3314 // Ignore ephemeral values, they will be dropped by the transform.
3315 if (EphTracker.track(&I))
3316 continue;
3317
3318 // Only speculatively execute a single instruction (not counting the
3319 // terminator) for now.
3320 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3322 SpeculatedConditionalLoadsStores.size() <
3324 // Not count load/store into cost if target supports conditional faulting
3325 // b/c it's cheap to speculate it.
3326 if (IsSafeCheapLoadStore)
3327 SpeculatedConditionalLoadsStores.push_back(&I);
3328 else
3329 ++SpeculatedInstructions;
3330
3331 if (SpeculatedInstructions > 1)
3332 return false;
3333
3334 // Don't hoist the instruction if it's unsafe or expensive.
3335 if (!IsSafeCheapLoadStore &&
3337 !(HoistCondStores && !SpeculatedStoreValue &&
3338 (SpeculatedStoreValue =
3339 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3340 return false;
3341 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3344 return false;
3345
3346 // Store the store speculation candidate.
3347 if (!SpeculatedStore && SpeculatedStoreValue)
3348 SpeculatedStore = cast<StoreInst>(&I);
3349
3350 // Do not hoist the instruction if any of its operands are defined but not
3351 // used in BB. The transformation will prevent the operand from
3352 // being sunk into the use block.
3353 for (Use &Op : I.operands()) {
3354 Instruction *OpI = dyn_cast<Instruction>(Op);
3355 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3356 continue; // Not a candidate for sinking.
3357
3358 ++SinkCandidateUseCounts[OpI];
3359 }
3360 }
3361
3362 // Consider any sink candidates which are only used in ThenBB as costs for
3363 // speculation. Note, while we iterate over a DenseMap here, we are summing
3364 // and so iteration order isn't significant.
3365 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3366 if (Inst->hasNUses(Count)) {
3367 ++SpeculatedInstructions;
3368 if (SpeculatedInstructions > 1)
3369 return false;
3370 }
3371
3372 // Check that we can insert the selects and that it's not too expensive to do
3373 // so.
3374 bool Convert =
3375 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3377 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3378 SpeculatedInstructions, Cost, TTI);
3379 if (!Convert || Cost > Budget)
3380 return false;
3381
3382 // If we get here, we can hoist the instruction and if-convert.
3383 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3384
3385 // Insert a select of the value of the speculated store.
3386 if (SpeculatedStoreValue) {
3387 IRBuilder<NoFolder> Builder(BI);
3388 Value *OrigV = SpeculatedStore->getValueOperand();
3389 Value *TrueV = SpeculatedStore->getValueOperand();
3390 Value *FalseV = SpeculatedStoreValue;
3391 if (Invert)
3392 std::swap(TrueV, FalseV);
3393 Value *S = Builder.CreateSelect(
3394 BrCond, TrueV, FalseV, "spec.store.select", BI);
3395 SpeculatedStore->setOperand(0, S);
3396 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3397 SpeculatedStore->getDebugLoc());
3398 // The value stored is still conditional, but the store itself is now
3399 // unconditonally executed, so we must be sure that any linked dbg.assign
3400 // intrinsics are tracking the new stored value (the result of the
3401 // select). If we don't, and the store were to be removed by another pass
3402 // (e.g. DSE), then we'd eventually end up emitting a location describing
3403 // the conditional value, unconditionally.
3404 //
3405 // === Before this transformation ===
3406 // pred:
3407 // store %one, %x.dest, !DIAssignID !1
3408 // dbg.assign %one, "x", ..., !1, ...
3409 // br %cond if.then
3410 //
3411 // if.then:
3412 // store %two, %x.dest, !DIAssignID !2
3413 // dbg.assign %two, "x", ..., !2, ...
3414 //
3415 // === After this transformation ===
3416 // pred:
3417 // store %one, %x.dest, !DIAssignID !1
3418 // dbg.assign %one, "x", ..., !1
3419 /// ...
3420 // %merge = select %cond, %two, %one
3421 // store %merge, %x.dest, !DIAssignID !2
3422 // dbg.assign %merge, "x", ..., !2
3423 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3424 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3425 DbgAssign->replaceVariableLocationOp(OrigV, S);
3426 };
3427 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3428 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3429 }
3430
3431 // Metadata can be dependent on the condition we are hoisting above.
3432 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3433 // to avoid making it appear as if the condition is a constant, which would
3434 // be misleading while debugging.
3435 // Similarly strip attributes that maybe dependent on condition we are
3436 // hoisting above.
3437 for (auto &I : make_early_inc_range(*ThenBB)) {
3438 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3439 // Don't update the DILocation of dbg.assign intrinsics.
3440 if (!isa<DbgAssignIntrinsic>(&I))
3441 I.setDebugLoc(DebugLoc());
3442 }
3443 I.dropUBImplyingAttrsAndMetadata();
3444
3445 // Drop ephemeral values.
3446 if (EphTracker.contains(&I)) {
3447 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3448 I.eraseFromParent();
3449 }
3450 }
3451
3452 // Hoist the instructions.
3453 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3454 // to these instructions, in the same way that dbg.value intrinsics are
3455 // dropped at the end of this block.
3456 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3457 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3458 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3459 // equivalent).
3460 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3461 !DVR || !DVR->isDbgAssign())
3462 It.dropOneDbgRecord(&DR);
3463 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3464 std::prev(ThenBB->end()));
3465
3466 if (!SpeculatedConditionalLoadsStores.empty())
3467 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
3468
3469 // Insert selects and rewrite the PHI operands.
3470 IRBuilder<NoFolder> Builder(BI);
3471 for (PHINode &PN : EndBB->phis()) {
3472 unsigned OrigI = PN.getBasicBlockIndex(BB);
3473 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3474 Value *OrigV = PN.getIncomingValue(OrigI);
3475 Value *ThenV = PN.getIncomingValue(ThenI);
3476
3477 // Skip PHIs which are trivial.
3478 if (OrigV == ThenV)
3479 continue;
3480
3481 // Create a select whose true value is the speculatively executed value and
3482 // false value is the pre-existing value. Swap them if the branch
3483 // destinations were inverted.
3484 Value *TrueV = ThenV, *FalseV = OrigV;
3485 if (Invert)
3486 std::swap(TrueV, FalseV);
3487 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3488 PN.setIncomingValue(OrigI, V);
3489 PN.setIncomingValue(ThenI, V);
3490 }
3491
3492 // Remove speculated dbg intrinsics.
3493 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3494 // dbg value for the different flows and inserting it after the select.
3495 for (Instruction *I : SpeculatedDbgIntrinsics) {
3496 // We still want to know that an assignment took place so don't remove
3497 // dbg.assign intrinsics.
3498 if (!isa<DbgAssignIntrinsic>(I))
3499 I->eraseFromParent();
3500 }
3501
3502 ++NumSpeculations;
3503 return true;
3504}
3505
3506/// Return true if we can thread a branch across this block.
3508 int Size = 0;
3509 EphemeralValueTracker EphTracker;
3510
3511 // Walk the loop in reverse so that we can identify ephemeral values properly
3512 // (values only feeding assumes).
3513 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3514 // Can't fold blocks that contain noduplicate or convergent calls.
3515 if (CallInst *CI = dyn_cast<CallInst>(&I))
3516 if (CI->cannotDuplicate() || CI->isConvergent())
3517 return false;
3518
3519 // Ignore ephemeral values which are deleted during codegen.
3520 // We will delete Phis while threading, so Phis should not be accounted in
3521 // block's size.
3522 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3523 if (Size++ > MaxSmallBlockSize)
3524 return false; // Don't clone large BB's.
3525 }
3526
3527 // We can only support instructions that do not define values that are
3528 // live outside of the current basic block.
3529 for (User *U : I.users()) {
3530 Instruction *UI = cast<Instruction>(U);
3531 if (UI->getParent() != BB || isa<PHINode>(UI))
3532 return false;
3533 }
3534
3535 // Looks ok, continue checking.
3536 }
3537
3538 return true;
3539}
3540
3542 BasicBlock *To) {
3543 // Don't look past the block defining the value, we might get the value from
3544 // a previous loop iteration.
3545 auto *I = dyn_cast<Instruction>(V);
3546 if (I && I->getParent() == To)
3547 return nullptr;
3548
3549 // We know the value if the From block branches on it.
3550 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3551 if (BI && BI->isConditional() && BI->getCondition() == V &&
3552 BI->getSuccessor(0) != BI->getSuccessor(1))
3553 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3555
3556 return nullptr;
3557}
3558
3559/// If we have a conditional branch on something for which we know the constant
3560/// value in predecessors (e.g. a phi node in the current block), thread edges
3561/// from the predecessor to their ultimate destination.
3562static std::optional<bool>
3564 const DataLayout &DL,
3565 AssumptionCache *AC) {
3567 BasicBlock *BB = BI->getParent();
3568 Value *Cond = BI->getCondition();
3569 PHINode *PN = dyn_cast<PHINode>(Cond);
3570 if (PN && PN->getParent() == BB) {
3571 // Degenerate case of a single entry PHI.
3572 if (PN->getNumIncomingValues() == 1) {
3574 return true;
3575 }
3576
3577 for (Use &U : PN->incoming_values())
3578 if (auto *CB = dyn_cast<ConstantInt>(U))
3579 KnownValues[CB].insert(PN->getIncomingBlock(U));
3580 } else {
3581 for (BasicBlock *Pred : predecessors(BB)) {
3582 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3583 KnownValues[CB].insert(Pred);
3584 }
3585 }
3586
3587 if (KnownValues.empty())
3588 return false;
3589
3590 // Now we know that this block has multiple preds and two succs.
3591 // Check that the block is small enough and values defined in the block are
3592 // not used outside of it.
3594 return false;
3595
3596 for (const auto &Pair : KnownValues) {
3597 // Okay, we now know that all edges from PredBB should be revectored to
3598 // branch to RealDest.
3599 ConstantInt *CB = Pair.first;
3600 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3601 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3602
3603 if (RealDest == BB)
3604 continue; // Skip self loops.
3605
3606 // Skip if the predecessor's terminator is an indirect branch.
3607 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3608 return isa<IndirectBrInst>(PredBB->getTerminator());
3609 }))
3610 continue;
3611
3612 LLVM_DEBUG({
3613 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3614 << " has value " << *Pair.first << " in predecessors:\n";
3615 for (const BasicBlock *PredBB : Pair.second)
3616 dbgs() << " " << PredBB->getName() << "\n";
3617 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3618 });
3619
3620 // Split the predecessors we are threading into a new edge block. We'll
3621 // clone the instructions into this block, and then redirect it to RealDest.
3622 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3623
3624 // TODO: These just exist to reduce test diff, we can drop them if we like.
3625 EdgeBB->setName(RealDest->getName() + ".critedge");
3626 EdgeBB->moveBefore(RealDest);
3627
3628 // Update PHI nodes.
3629 addPredecessorToBlock(RealDest, EdgeBB, BB);
3630
3631 // BB may have instructions that are being threaded over. Clone these
3632 // instructions into EdgeBB. We know that there will be no uses of the
3633 // cloned instructions outside of EdgeBB.
3634 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3635 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3636 TranslateMap[Cond] = CB;
3637
3638 // RemoveDIs: track instructions that we optimise away while folding, so
3639 // that we can copy DbgVariableRecords from them later.
3640 BasicBlock::iterator SrcDbgCursor = BB->begin();
3641 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3642 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3643 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3644 continue;
3645 }
3646 // Clone the instruction.
3647 Instruction *N = BBI->clone();
3648 // Insert the new instruction into its new home.
3649 N->insertInto(EdgeBB, InsertPt);
3650
3651 if (BBI->hasName())
3652 N->setName(BBI->getName() + ".c");
3653
3654 // Update operands due to translation.
3655 for (Use &Op : N->operands()) {
3656 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3657 if (PI != TranslateMap.end())
3658 Op = PI->second;
3659 }
3660
3661 // Check for trivial simplification.
3662 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3663 if (!BBI->use_empty())
3664 TranslateMap[&*BBI] = V;
3665 if (!N->mayHaveSideEffects()) {
3666 N->eraseFromParent(); // Instruction folded away, don't need actual
3667 // inst
3668 N = nullptr;
3669 }
3670 } else {
3671 if (!BBI->use_empty())
3672 TranslateMap[&*BBI] = N;
3673 }
3674 if (N) {
3675 // Copy all debug-info attached to instructions from the last we
3676 // successfully clone, up to this instruction (they might have been
3677 // folded away).
3678 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3679 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3680 SrcDbgCursor = std::next(BBI);
3681 // Clone debug-info on this instruction too.
3682 N->cloneDebugInfoFrom(&*BBI);
3683
3684 // Register the new instruction with the assumption cache if necessary.
3685 if (auto *Assume = dyn_cast<AssumeInst>(N))
3686 if (AC)
3687 AC->registerAssumption(Assume);
3688 }
3689 }
3690
3691 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3692 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3693 InsertPt->cloneDebugInfoFrom(BI);
3694
3695 BB->removePredecessor(EdgeBB);
3696 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3697 EdgeBI->setSuccessor(0, RealDest);
3698 EdgeBI->setDebugLoc(BI->getDebugLoc());
3699
3700 if (DTU) {
3702 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3703 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3704 DTU->applyUpdates(Updates);
3705 }
3706
3707 // For simplicity, we created a separate basic block for the edge. Merge
3708 // it back into the predecessor if possible. This not only avoids
3709 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3710 // bypass the check for trivial cycles above.
3711 MergeBlockIntoPredecessor(EdgeBB, DTU);
3712
3713 // Signal repeat, simplifying any other constants.
3714 return std::nullopt;
3715 }
3716
3717 return false;
3718}
3719
3721 DomTreeUpdater *DTU,
3722 const DataLayout &DL,
3723 AssumptionCache *AC) {
3724 std::optional<bool> Result;
3725 bool EverChanged = false;
3726 do {
3727 // Note that None means "we changed things, but recurse further."
3728 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3729 EverChanged |= Result == std::nullopt || *Result;
3730 } while (Result == std::nullopt);
3731 return EverChanged;
3732}
3733
3734/// Given a BB that starts with the specified two-entry PHI node,
3735/// see if we can eliminate it.
3738 const DataLayout &DL,
3739 bool SpeculateUnpredictables) {
3740 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3741 // statement", which has a very simple dominance structure. Basically, we
3742 // are trying to find the condition that is being branched on, which
3743 // subsequently causes this merge to happen. We really want control
3744 // dependence information for this check, but simplifycfg can't keep it up
3745 // to date, and this catches most of the cases we care about anyway.
3746 BasicBlock *BB = PN->getParent();
3747
3748 BasicBlock *IfTrue, *IfFalse;
3749 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3750 if (!DomBI)
3751 return false;
3752 Value *IfCond = DomBI->getCondition();
3753 // Don't bother if the branch will be constant folded trivially.
3754 if (isa<ConstantInt>(IfCond))
3755 return false;
3756
3757 BasicBlock *DomBlock = DomBI->getParent();
3760 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3761 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3762 });
3763 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3764 "Will have either one or two blocks to speculate.");
3765
3766 // If the branch is non-unpredictable, see if we either predictably jump to
3767 // the merge bb (if we have only a single 'then' block), or if we predictably
3768 // jump to one specific 'then' block (if we have two of them).
3769 // It isn't beneficial to speculatively execute the code
3770 // from the block that we know is predictably not entered.
3771 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3772 if (!IsUnpredictable) {
3773 uint64_t TWeight, FWeight;
3774 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3775 (TWeight + FWeight) != 0) {
3776 BranchProbability BITrueProb =
3777 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3779 BranchProbability BIFalseProb = BITrueProb.getCompl();
3780 if (IfBlocks.size() == 1) {
3781 BranchProbability BIBBProb =
3782 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3783 if (BIBBProb >= Likely)
3784 return false;
3785 } else {
3786 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3787 return false;
3788 }
3789 }
3790 }
3791
3792 // Don't try to fold an unreachable block. For example, the phi node itself
3793 // can't be the candidate if-condition for a select that we want to form.
3794 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3795 if (IfCondPhiInst->getParent() == BB)
3796 return false;
3797
3798 // Okay, we found that we can merge this two-entry phi node into a select.
3799 // Doing so would require us to fold *all* two entry phi nodes in this block.
3800 // At some point this becomes non-profitable (particularly if the target
3801 // doesn't support cmov's). Only do this transformation if there are two or
3802 // fewer PHI nodes in this block.
3803 unsigned NumPhis = 0;
3804 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3805 if (NumPhis > 2)
3806 return false;
3807
3808 // Loop over the PHI's seeing if we can promote them all to select
3809 // instructions. While we are at it, keep track of the instructions
3810 // that need to be moved to the dominating block.
3811 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3813 InstructionCost Budget =
3815 if (SpeculateUnpredictables && IsUnpredictable)
3816 Budget += TTI.getBranchMispredictPenalty();
3817
3818 bool Changed = false;
3819 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3820 PHINode *PN = cast<PHINode>(II++);
3821 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3822 PN->replaceAllUsesWith(V);
3823 PN->eraseFromParent();
3824 Changed = true;
3825 continue;
3826 }
3827
3828 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3829 AggressiveInsts, Cost, Budget, TTI, AC) ||
3830 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3831 AggressiveInsts, Cost, Budget, TTI, AC))
3832 return Changed;
3833 }
3834
3835 // If we folded the first phi, PN dangles at this point. Refresh it. If
3836 // we ran out of PHIs then we simplified them all.
3837 PN = dyn_cast<PHINode>(BB->begin());
3838 if (!PN)
3839 return true;
3840
3841 // Return true if at least one of these is a 'not', and another is either
3842 // a 'not' too, or a constant.
3843 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3844 if (!match(V0, m_Not(m_Value())))
3845 std::swap(V0, V1);
3846 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3847 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3848 };
3849
3850 // Don't fold i1 branches on PHIs which contain binary operators or
3851 // (possibly inverted) select form of or/ands, unless one of
3852 // the incoming values is an 'not' and another one is freely invertible.
3853 // These can often be turned into switches and other things.
3854 auto IsBinOpOrAnd = [](Value *V) {
3855 return match(
3857 };
3858 if (PN->getType()->isIntegerTy(1) &&
3859 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3860 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3861 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3862 PN->getIncomingValue(1)))
3863 return Changed;
3864
3865 // If all PHI nodes are promotable, check to make sure that all instructions
3866 // in the predecessor blocks can be promoted as well. If not, we won't be able
3867 // to get rid of the control flow, so it's not worth promoting to select
3868 // instructions.
3869 for (BasicBlock *IfBlock : IfBlocks)
3870 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3871 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3872 // This is not an aggressive instruction that we can promote.
3873 // Because of this, we won't be able to get rid of the control flow, so
3874 // the xform is not worth it.
3875 return Changed;
3876 }
3877
3878 // If either of the blocks has it's address taken, we can't do this fold.
3879 if (any_of(IfBlocks,
3880 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3881 return Changed;
3882
3883 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3884 if (IsUnpredictable) dbgs() << " (unpredictable)";
3885 dbgs() << " T: " << IfTrue->getName()
3886 << " F: " << IfFalse->getName() << "\n");
3887
3888 // If we can still promote the PHI nodes after this gauntlet of tests,
3889 // do all of the PHI's now.
3890
3891 // Move all 'aggressive' instructions, which are defined in the
3892 // conditional parts of the if's up to the dominating block.
3893 for (BasicBlock *IfBlock : IfBlocks)
3894 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3895
3896 IRBuilder<NoFolder> Builder(DomBI);
3897 // Propagate fast-math-flags from phi nodes to replacement selects.
3898 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3899 // Change the PHI node into a select instruction.
3900 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3901 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3902
3903 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3904 isa<FPMathOperator>(PN) ? PN : nullptr,
3905 "", DomBI);
3906 PN->replaceAllUsesWith(Sel);
3907 Sel->takeName(PN);
3908 PN->eraseFromParent();
3909 }
3910
3911 // At this point, all IfBlocks are empty, so our if statement
3912 // has been flattened. Change DomBlock to jump directly to our new block to
3913 // avoid other simplifycfg's kicking in on the diamond.
3914 Builder.CreateBr(BB);
3915
3917 if (DTU) {
3918 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3919 for (auto *Successor : successors(DomBlock))
3920 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3921 }
3922
3923 DomBI->eraseFromParent();
3924 if (DTU)
3925 DTU->applyUpdates(Updates);
3926
3927 return true;
3928}
3929
3931 Instruction::BinaryOps Opc, Value *LHS,
3932 Value *RHS, const Twine &Name = "") {
3933 // Try to relax logical op to binary op.
3934 if (impliesPoison(RHS, LHS))
3935 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3936 if (Opc == Instruction::And)
3937 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3938 if (Opc == Instruction::Or)
3939 return Builder.CreateLogicalOr(LHS, RHS, Name);
3940 llvm_unreachable("Invalid logical opcode");
3941}
3942
3943/// Return true if either PBI or BI has branch weight available, and store
3944/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3945/// not have branch weight, use 1:1 as its weight.
3947 uint64_t &PredTrueWeight,
3948 uint64_t &PredFalseWeight,
3949 uint64_t &SuccTrueWeight,
3950 uint64_t &SuccFalseWeight) {
3951 bool PredHasWeights =
3952 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3953 bool SuccHasWeights =
3954 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3955 if (PredHasWeights || SuccHasWeights) {
3956 if (!PredHasWeights)
3957 PredTrueWeight = PredFalseWeight = 1;
3958 if (!SuccHasWeights)
3959 SuccTrueWeight = SuccFalseWeight = 1;
3960 return true;
3961 } else {
3962 return false;
3963 }
3964}
3965
3966/// Determine if the two branches share a common destination and deduce a glue
3967/// that joins the branches' conditions to arrive at the common destination if
3968/// that would be profitable.
3969static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3971 const TargetTransformInfo *TTI) {
3972 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3973 "Both blocks must end with a conditional branches.");
3975 "PredBB must be a predecessor of BB.");
3976
3977 // We have the potential to fold the conditions together, but if the
3978 // predecessor branch is predictable, we may not want to merge them.
3979 uint64_t PTWeight, PFWeight;
3980 BranchProbability PBITrueProb, Likely;
3981 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3982 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3983 (PTWeight + PFWeight) != 0) {
3984 PBITrueProb =
3985 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3987 }
3988
3989 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3990 // Speculate the 2nd condition unless the 1st is probably true.
3991 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3992 return {{BI->getSuccessor(0), Instruction::Or, false}};
3993 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3994 // Speculate the 2nd condition unless the 1st is probably false.
3995 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3996 return {{BI->getSuccessor(1), Instruction::And, false}};
3997 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3998 // Speculate the 2nd condition unless the 1st is probably true.
3999 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4000 return {{BI->getSuccessor(1), Instruction::And, true}};
4001 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4002 // Speculate the 2nd condition unless the 1st is probably false.
4003 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4004 return {{BI->getSuccessor(0), Instruction::Or, true}};
4005 }
4006 return std::nullopt;
4007}
4008
4010 DomTreeUpdater *DTU,
4011 MemorySSAUpdater *MSSAU,
4012 const TargetTransformInfo *TTI) {
4013 BasicBlock *BB = BI->getParent();
4014 BasicBlock *PredBlock = PBI->getParent();
4015
4016 // Determine if the two branches share a common destination.
4017 BasicBlock *CommonSucc;
4019 bool InvertPredCond;
4020 std::tie(CommonSucc, Opc, InvertPredCond) =
4022
4023 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4024
4025 IRBuilder<> Builder(PBI);
4026 // The builder is used to create instructions to eliminate the branch in BB.
4027 // If BB's terminator has !annotation metadata, add it to the new
4028 // instructions.
4030 {LLVMContext::MD_annotation});
4031
4032 // If we need to invert the condition in the pred block to match, do so now.
4033 if (InvertPredCond) {
4034 InvertBranch(PBI, Builder);
4035 }
4036
4037 BasicBlock *UniqueSucc =
4038 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4039
4040 // Before cloning instructions, notify the successor basic block that it
4041 // is about to have a new predecessor. This will update PHI nodes,
4042 // which will allow us to update live-out uses of bonus instructions.
4043 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4044
4045 // Try to update branch weights.
4046 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4047 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4048 SuccTrueWeight, SuccFalseWeight)) {
4049 SmallVector<uint64_t, 8> NewWeights;
4050
4051 if (PBI->getSuccessor(0) == BB) {
4052 // PBI: br i1 %x, BB, FalseDest
4053 // BI: br i1 %y, UniqueSucc, FalseDest
4054 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4055 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4056 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4057 // TrueWeight for PBI * FalseWeight for BI.
4058 // We assume that total weights of a BranchInst can fit into 32 bits.
4059 // Therefore, we will not have overflow using 64-bit arithmetic.
4060 NewWeights.push_back(PredFalseWeight *
4061 (SuccFalseWeight + SuccTrueWeight) +
4062 PredTrueWeight * SuccFalseWeight);
4063 } else {
4064 // PBI: br i1 %x, TrueDest, BB
4065 // BI: br i1 %y, TrueDest, UniqueSucc
4066 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4067 // FalseWeight for PBI * TrueWeight for BI.
4068 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4069 PredFalseWeight * SuccTrueWeight);
4070 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4071 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4072 }
4073
4074 // Halve the weights if any of them cannot fit in an uint32_t
4075 fitWeights(NewWeights);
4076
4077 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4078 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4079
4080 // TODO: If BB is reachable from all paths through PredBlock, then we
4081 // could replace PBI's branch probabilities with BI's.
4082 } else
4083 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4084
4085 // Now, update the CFG.
4086 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4087
4088 if (DTU)
4089 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4090 {DominatorTree::Delete, PredBlock, BB}});
4091
4092 // If BI was a loop latch, it may have had associated loop metadata.
4093 // We need to copy it to the new latch, that is, PBI.
4094 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4095 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4096
4097 ValueToValueMapTy VMap; // maps original values to cloned values
4099
4100 Module *M = BB->getModule();
4101
4102 if (PredBlock->IsNewDbgInfoFormat) {
4103 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4104 for (DbgVariableRecord &DVR :
4106 RemapDbgRecord(M, &DVR, VMap,
4108 }
4109 }
4110
4111 // Now that the Cond was cloned into the predecessor basic block,
4112 // or/and the two conditions together.
4113 Value *BICond = VMap[BI->getCondition()];
4114 PBI->setCondition(
4115 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4116
4117 ++NumFoldBranchToCommonDest;
4118 return true;
4119}
4120
4121/// Return if an instruction's type or any of its operands' types are a vector
4122/// type.
4123static bool isVectorOp(Instruction &I) {
4124 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4125 return U->getType()->isVectorTy();
4126 });
4127}
4128
4129/// If this basic block is simple enough, and if a predecessor branches to us
4130/// and one of our successors, fold the block into the predecessor and use
4131/// logical operations to pick the right destination.
4133 MemorySSAUpdater *MSSAU,
4134 const TargetTransformInfo *TTI,
4135 unsigned BonusInstThreshold) {
4136 // If this block ends with an unconditional branch,
4137 // let speculativelyExecuteBB() deal with it.
4138 if (!BI->isConditional())
4139 return false;
4140
4141 BasicBlock *BB = BI->getParent();
4145
4146 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4147
4148 if (!Cond ||
4149 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
4150 !isa<SelectInst>(Cond)) ||
4151 Cond->getParent() != BB || !Cond->hasOneUse())
4152 return false;
4153
4154 // Finally, don't infinitely unroll conditional loops.
4155 if (is_contained(successors(BB), BB))
4156 return false;
4157
4158 // With which predecessors will we want to deal with?
4160 for (BasicBlock *PredBlock : predecessors(BB)) {
4161 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4162
4163 // Check that we have two conditional branches. If there is a PHI node in
4164 // the common successor, verify that the same value flows in from both
4165 // blocks.
4166 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4167 continue;
4168
4169 // Determine if the two branches share a common destination.
4170 BasicBlock *CommonSucc;
4172 bool InvertPredCond;
4173 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4174 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4175 else
4176 continue;
4177
4178 // Check the cost of inserting the necessary logic before performing the
4179 // transformation.
4180 if (TTI) {
4181 Type *Ty = BI->getCondition()->getType();
4183 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4184 !isa<CmpInst>(PBI->getCondition())))
4185 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4186
4188 continue;
4189 }
4190
4191 // Ok, we do want to deal with this predecessor. Record it.
4192 Preds.emplace_back(PredBlock);
4193 }
4194
4195 // If there aren't any predecessors into which we can fold,
4196 // don't bother checking the cost.
4197 if (Preds.empty())
4198 return false;
4199
4200 // Only allow this transformation if computing the condition doesn't involve
4201 // too many instructions and these involved instructions can be executed
4202 // unconditionally. We denote all involved instructions except the condition
4203 // as "bonus instructions", and only allow this transformation when the
4204 // number of the bonus instructions we'll need to create when cloning into
4205 // each predecessor does not exceed a certain threshold.
4206 unsigned NumBonusInsts = 0;
4207 bool SawVectorOp = false;
4208 const unsigned PredCount = Preds.size();
4209 for (Instruction &I : *BB) {
4210 // Don't check the branch condition comparison itself.
4211 if (&I == Cond)
4212 continue;
4213 // Ignore dbg intrinsics, and the terminator.
4214 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
4215 continue;
4216 // I must be safe to execute unconditionally.
4218 return false;
4219 SawVectorOp |= isVectorOp(I);
4220
4221 // Account for the cost of duplicating this instruction into each
4222 // predecessor. Ignore free instructions.
4223 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4225 NumBonusInsts += PredCount;
4226
4227 // Early exits once we reach the limit.
4228 if (NumBonusInsts >
4229 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4230 return false;
4231 }
4232
4233 auto IsBCSSAUse = [BB, &I](Use &U) {
4234 auto *UI = cast<Instruction>(U.getUser());
4235 if (auto *PN = dyn_cast<PHINode>(UI))
4236 return PN->getIncomingBlock(U) == BB;
4237 return UI->getParent() == BB && I.comesBefore(UI);
4238 };
4239
4240 // Does this instruction require rewriting of uses?
4241 if (!all_of(I.uses(), IsBCSSAUse))
4242 return false;
4243 }
4244 if (NumBonusInsts >
4245 BonusInstThreshold *
4246 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4247 return false;
4248
4249 // Ok, we have the budget. Perform the transformation.
4250 for (BasicBlock *PredBlock : Preds) {
4251 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4252 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4253 }
4254 return false;
4255}
4256
4257// If there is only one store in BB1 and BB2, return it, otherwise return
4258// nullptr.
4260 StoreInst *S = nullptr;
4261 for (auto *BB : {BB1, BB2}) {
4262 if (!BB)
4263 continue;
4264 for (auto &I : *BB)
4265 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4266 if (S)
4267 // Multiple stores seen.
4268 return nullptr;
4269 else
4270 S = SI;
4271 }
4272 }
4273 return S;
4274}
4275
4277 Value *AlternativeV = nullptr) {
4278 // PHI is going to be a PHI node that allows the value V that is defined in
4279 // BB to be referenced in BB's only successor.
4280 //
4281 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4282 // doesn't matter to us what the other operand is (it'll never get used). We
4283 // could just create a new PHI with an undef incoming value, but that could
4284 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4285 // other PHI. So here we directly look for some PHI in BB's successor with V
4286 // as an incoming operand. If we find one, we use it, else we create a new
4287 // one.
4288 //
4289 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4290 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4291 // where OtherBB is the single other predecessor of BB's only successor.
4292 PHINode *PHI = nullptr;
4293 BasicBlock *Succ = BB->getSingleSuccessor();
4294
4295 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4296 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4297 PHI = cast<PHINode>(I);
4298 if (!AlternativeV)
4299 break;
4300
4301 assert(Succ->hasNPredecessors(2));
4302 auto PredI = pred_begin(Succ);
4303 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4304 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4305 break;
4306 PHI = nullptr;
4307 }
4308 if (PHI)
4309 return PHI;
4310
4311 // If V is not an instruction defined in BB, just return it.
4312 if (!AlternativeV &&
4313 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4314 return V;
4315
4316 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4317 PHI->insertBefore(Succ->begin());
4318 PHI->addIncoming(V, BB);
4319 for (BasicBlock *PredBB : predecessors(Succ))
4320 if (PredBB != BB)
4321 PHI->addIncoming(
4322 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4323 return PHI;
4324}
4325
4327 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4328 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4329 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4330 // For every pointer, there must be exactly two stores, one coming from
4331 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4332 // store (to any address) in PTB,PFB or QTB,QFB.
4333 // FIXME: We could relax this restriction with a bit more work and performance
4334 // testing.
4335 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4336 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4337 if (!PStore || !QStore)
4338 return false;
4339
4340 // Now check the stores are compatible.
4341 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4342 PStore->getValueOperand()->getType() !=
4343 QStore->getValueOperand()->getType())
4344 return false;
4345
4346 // Check that sinking the store won't cause program behavior changes. Sinking
4347 // the store out of the Q blocks won't change any behavior as we're sinking
4348 // from a block to its unconditional successor. But we're moving a store from
4349 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4350 // So we need to check that there are no aliasing loads or stores in
4351 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4352 // operations between PStore and the end of its parent block.
4353 //
4354 // The ideal way to do this is to query AliasAnalysis, but we don't
4355 // preserve AA currently so that is dangerous. Be super safe and just
4356 // check there are no other memory operations at all.
4357 for (auto &I : *QFB->getSinglePredecessor())
4358 if (I.mayReadOrWriteMemory())
4359 return false;
4360 for (auto &I : *QFB)
4361 if (&I != QStore && I.mayReadOrWriteMemory())
4362 return false;
4363 if (QTB)
4364 for (auto &I : *QTB)
4365 if (&I != QStore && I.mayReadOrWriteMemory())
4366 return false;
4367 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4368 I != E; ++I)
4369 if (&*I != PStore && I->mayReadOrWriteMemory())
4370 return false;
4371
4372 // If we're not in aggressive mode, we only optimize if we have some
4373 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4374 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4375 if (!BB)
4376 return true;
4377 // Heuristic: if the block can be if-converted/phi-folded and the
4378 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4379 // thread this store.
4381 InstructionCost Budget =
4383 for (auto &I : BB->instructionsWithoutDebug(false)) {
4384 // Consider terminator instruction to be free.
4385 if (I.isTerminator())
4386 continue;
4387 // If this is one the stores that we want to speculate out of this BB,
4388 // then don't count it's cost, consider it to be free.
4389 if (auto *S = dyn_cast<StoreInst>(&I))
4390 if (llvm::find(FreeStores, S))
4391 continue;
4392 // Else, we have a white-list of instructions that we are ak speculating.
4393 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4394 return false; // Not in white-list - not worthwhile folding.
4395 // And finally, if this is a non-free instruction that we are okay
4396 // speculating, ensure that we consider the speculation budget.
4397 Cost +=
4399 if (Cost > Budget)
4400 return false; // Eagerly refuse to fold as soon as we're out of budget.
4401 }
4402 assert(Cost <= Budget &&
4403 "When we run out of budget we will eagerly return from within the "
4404 "per-instruction loop.");
4405 return true;
4406 };
4407
4408 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4410 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4411 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4412 return false;
4413
4414 // If PostBB has more than two predecessors, we need to split it so we can
4415 // sink the store.
4416 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4417 // We know that QFB's only successor is PostBB. And QFB has a single
4418 // predecessor. If QTB exists, then its only successor is also PostBB.
4419 // If QTB does not exist, then QFB's only predecessor has a conditional
4420 // branch to QFB and PostBB.
4421 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4422 BasicBlock *NewBB =
4423 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4424 if (!NewBB)
4425 return false;
4426 PostBB = NewBB;
4427 }
4428
4429 // OK, we're going to sink the stores to PostBB. The store has to be
4430 // conditional though, so first create the predicate.
4431 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4432 ->getCondition();
4433 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4434 ->getCondition();
4435
4437 PStore->getParent());
4439 QStore->getParent(), PPHI);
4440
4441 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4442 IRBuilder<> QB(PostBB, PostBBFirst);
4443 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4444
4445 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4446 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4447
4448 if (InvertPCond)
4449 PPred = QB.CreateNot(PPred);
4450 if (InvertQCond)
4451 QPred = QB.CreateNot(QPred);
4452 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4453
4454 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4455 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4456 /*Unreachable=*/false,
4457 /*BranchWeights=*/nullptr, DTU);
4458
4459 QB.SetInsertPoint(T);
4460 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4461 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4462 // Choose the minimum alignment. If we could prove both stores execute, we
4463 // could use biggest one. In this case, though, we only know that one of the
4464 // stores executes. And we don't know it's safe to take the alignment from a
4465 // store that doesn't execute.
4466 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4467
4468 QStore->eraseFromParent();
4469 PStore->eraseFromParent();
4470
4471 return true;
4472}
4473
4475 DomTreeUpdater *DTU, const DataLayout &DL,
4476 const TargetTransformInfo &TTI) {
4477 // The intention here is to find diamonds or triangles (see below) where each
4478 // conditional block contains a store to the same address. Both of these
4479 // stores are conditional, so they can't be unconditionally sunk. But it may
4480 // be profitable to speculatively sink the stores into one merged store at the
4481 // end, and predicate the merged store on the union of the two conditions of
4482 // PBI and QBI.
4483 //
4484 // This can reduce the number of stores executed if both of the conditions are
4485 // true, and can allow the blocks to become small enough to be if-converted.
4486 // This optimization will also chain, so that ladders of test-and-set
4487 // sequences can be if-converted away.
4488 //
4489 // We only deal with simple diamonds or triangles:
4490 //
4491 // PBI or PBI or a combination of the two
4492 // / \ | \
4493 // PTB PFB | PFB
4494 // \ / | /
4495 // QBI QBI
4496 // / \ | \
4497 // QTB QFB | QFB
4498 // \ / | /
4499 // PostBB PostBB
4500 //
4501 // We model triangles as a type of diamond with a nullptr "true" block.
4502 // Triangles are canonicalized so that the fallthrough edge is represented by
4503 // a true condition, as in the diagram above.
4504 BasicBlock *PTB = PBI->getSuccessor(0);
4505 BasicBlock *PFB = PBI->getSuccessor(1);
4506 BasicBlock *QTB = QBI->getSuccessor(0);
4507 BasicBlock *QFB = QBI->getSuccessor(1);
4508 BasicBlock *PostBB = QFB->getSingleSuccessor();
4509
4510 // Make sure we have a good guess for PostBB. If QTB's only successor is
4511 // QFB, then QFB is a better PostBB.
4512 if (QTB->getSingleSuccessor() == QFB)
4513 PostBB = QFB;
4514
4515 // If we couldn't find a good PostBB, stop.
4516 if (!PostBB)
4517 return false;
4518
4519 bool InvertPCond = false, InvertQCond = false;
4520 // Canonicalize fallthroughs to the true branches.
4521 if (PFB == QBI->getParent()) {
4522 std::swap(PFB, PTB);
4523 InvertPCond = true;
4524 }
4525 if (QFB == PostBB) {
4526 std::swap(QFB, QTB);
4527 InvertQCond = true;
4528 }
4529
4530 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4531 // and QFB may not. Model fallthroughs as a nullptr block.
4532 if (PTB == QBI->getParent())
4533 PTB = nullptr;
4534 if (QTB == PostBB)
4535 QTB = nullptr;
4536
4537 // Legality bailouts. We must have at least the non-fallthrough blocks and
4538 // the post-dominating block, and the non-fallthroughs must only have one
4539 // predecessor.
4540 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4541 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4542 };
4543 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4544 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4545 return false;
4546 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4547 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4548 return false;
4549 if (!QBI->getParent()->hasNUses(2))
4550 return false;
4551
4552 // OK, this is a sequence of two diamonds or triangles.
4553 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4554 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4555 for (auto *BB : {PTB, PFB}) {
4556 if (!BB)
4557 continue;
4558 for (auto &I : *BB)
4559 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4560 PStoreAddresses.insert(SI->getPointerOperand());
4561 }
4562 for (auto *BB : {QTB, QFB}) {
4563 if (!BB)
4564 continue;
4565 for (auto &I : *BB)
4566 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4567 QStoreAddresses.insert(SI->getPointerOperand());
4568 }
4569
4570 set_intersect(PStoreAddresses, QStoreAddresses);
4571 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4572 // clear what it contains.
4573 auto &CommonAddresses = PStoreAddresses;
4574
4575 bool Changed = false;
4576 for (auto *Address : CommonAddresses)
4577 Changed |=
4578 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4579 InvertPCond, InvertQCond, DTU, DL, TTI);
4580 return Changed;
4581}
4582
4583/// If the previous block ended with a widenable branch, determine if reusing
4584/// the target block is profitable and legal. This will have the effect of
4585/// "widening" PBI, but doesn't require us to reason about hosting safety.
4587 DomTreeUpdater *DTU) {
4588 // TODO: This can be generalized in two important ways:
4589 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4590 // values from the PBI edge.
4591 // 2) We can sink side effecting instructions into BI's fallthrough
4592 // successor provided they doesn't contribute to computation of
4593 // BI's condition.
4594 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4595 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4596 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4597 !BI->getParent()->getSinglePredecessor())
4598 return false;
4599 if (!IfFalseBB->phis().empty())
4600 return false; // TODO
4601 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4602 // may undo the transform done here.
4603 // TODO: There might be a more fine-grained solution to this.
4604 if (!llvm::succ_empty(IfFalseBB))
4605 return false;
4606 // Use lambda to lazily compute expensive condition after cheap ones.
4607 auto NoSideEffects = [](BasicBlock &BB) {
4608 return llvm::none_of(BB, [](const Instruction &I) {
4609 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4610 });
4611 };
4612 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4613 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4614 NoSideEffects(*BI->getParent())) {
4615 auto *OldSuccessor = BI->getSuccessor(1);
4616 OldSuccessor->removePredecessor(BI->getParent());
4617 BI->setSuccessor(1, IfFalseBB);
4618 if (DTU)
4619 DTU->applyUpdates(
4620 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4621 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4622 return true;
4623 }
4624 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4625 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4626 NoSideEffects(*BI->getParent())) {
4627 auto *OldSuccessor = BI->getSuccessor(0);
4628 OldSuccessor->removePredecessor(BI->getParent());
4629 BI->setSuccessor(0, IfFalseBB);
4630 if (DTU)
4631 DTU->applyUpdates(
4632 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4633 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4634 return true;
4635 }
4636 return false;
4637}
4638
4639/// If we have a conditional branch as a predecessor of another block,
4640/// this function tries to simplify it. We know
4641/// that PBI and BI are both conditional branches, and BI is in one of the
4642/// successor blocks of PBI - PBI branches to BI.
4644 DomTreeUpdater *DTU,
4645 const DataLayout &DL,
4646 const TargetTransformInfo &TTI) {
4647 assert(PBI->isConditional() && BI->isConditional());
4648 BasicBlock *BB = BI->getParent();
4649
4650 // If this block ends with a branch instruction, and if there is a
4651 // predecessor that ends on a branch of the same condition, make
4652 // this conditional branch redundant.
4653 if (PBI->getCondition() == BI->getCondition() &&
4654 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4655 // Okay, the outcome of this conditional branch is statically
4656 // knowable. If this block had a single pred, handle specially, otherwise
4657 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4658 if (BB->getSinglePredecessor()) {
4659 // Turn this into a branch on constant.
4660 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4661 BI->setCondition(
4662 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4663 return true; // Nuke the branch on constant.
4664 }
4665 }
4666
4667 // If the previous block ended with a widenable branch, determine if reusing
4668 // the target block is profitable and legal. This will have the effect of
4669 // "widening" PBI, but doesn't require us to reason about hosting safety.
4670 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4671 return true;
4672
4673 // If both branches are conditional and both contain stores to the same
4674 // address, remove the stores from the conditionals and create a conditional
4675 // merged store at the end.
4676 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4677 return true;
4678
4679 // If this is a conditional branch in an empty block, and if any
4680 // predecessors are a conditional branch to one of our destinations,
4681 // fold the conditions into logical ops and one cond br.
4682
4683 // Ignore dbg intrinsics.
4684 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4685 return false;
4686
4687 int PBIOp, BIOp;
4688 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4689 PBIOp = 0;
4690 BIOp = 0;
4691 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4692 PBIOp = 0;
4693 BIOp = 1;
4694 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4695 PBIOp = 1;
4696 BIOp = 0;
4697 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4698 PBIOp = 1;
4699 BIOp = 1;
4700 } else {
4701 return false;
4702 }
4703
4704 // Check to make sure that the other destination of this branch
4705 // isn't BB itself. If so, this is an infinite loop that will
4706 // keep getting unwound.
4707 if (PBI->getSuccessor(PBIOp) == BB)
4708 return false;
4709
4710 // If predecessor's branch probability to BB is too low don't merge branches.
4711 SmallVector<uint32_t, 2> PredWeights;
4712 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4713 extractBranchWeights(*PBI, PredWeights) &&
4714 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4715
4717 PredWeights[PBIOp],
4718 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4719
4721 if (CommonDestProb >= Likely)
4722 return false;
4723 }
4724
4725 // Do not perform this transformation if it would require
4726 // insertion of a large number of select instructions. For targets
4727 // without predication/cmovs, this is a big pessimization.
4728
4729 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4730 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4731 unsigned NumPhis = 0;
4732 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4733 ++II, ++NumPhis) {
4734 if (NumPhis > 2) // Disable this xform.
4735 return false;
4736 }
4737
4738 // Finally, if everything is ok, fold the branches to logical ops.
4739 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4740
4741 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4742 << "AND: " << *BI->getParent());
4743
4745
4746 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4747 // branch in it, where one edge (OtherDest) goes back to itself but the other
4748 // exits. We don't *know* that the program avoids the infinite loop
4749 // (even though that seems likely). If we do this xform naively, we'll end up
4750 // recursively unpeeling the loop. Since we know that (after the xform is
4751 // done) that the block *is* infinite if reached, we just make it an obviously
4752 // infinite loop with no cond branch.
4753 if (OtherDest == BB) {
4754 // Insert it at the end of the function, because it's either code,
4755 // or it won't matter if it's hot. :)
4756 BasicBlock *InfLoopBlock =
4757 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4758 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4759 if (DTU)
4760 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4761 OtherDest = InfLoopBlock;
4762 }
4763
4764 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4765
4766 // BI may have other predecessors. Because of this, we leave
4767 // it alone, but modify PBI.
4768
4769 // Make sure we get to CommonDest on True&True directions.
4770 Value *PBICond = PBI->getCondition();
4771 IRBuilder<NoFolder> Builder(PBI);
4772 if (PBIOp)
4773 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4774
4775 Value *BICond = BI->getCondition();
4776 if (BIOp)
4777 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4778
4779 // Merge the conditions.
4780 Value *Cond =
4781 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4782
4783 // Modify PBI to branch on the new condition to the new dests.
4784 PBI->setCondition(Cond);
4785 PBI->setSuccessor(0, CommonDest);
4786 PBI->setSuccessor(1, OtherDest);
4787
4788 if (DTU) {
4789 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4790 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4791
4792 DTU->applyUpdates(Updates);
4793 }
4794
4795 // Update branch weight for PBI.
4796 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4797 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4798 bool HasWeights =
4799 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4800 SuccTrueWeight, SuccFalseWeight);
4801 if (HasWeights) {
4802 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4803 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4804 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4805 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4806 // The weight to CommonDest should be PredCommon * SuccTotal +
4807 // PredOther * SuccCommon.
4808 // The weight to OtherDest should be PredOther * SuccOther.
4809 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4810 PredOther * SuccCommon,
4811 PredOther * SuccOther};
4812 // Halve the weights if any of them cannot fit in an uint32_t
4813 fitWeights(NewWeights);
4814
4815 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4816 }
4817
4818 // OtherDest may have phi nodes. If so, add an entry from PBI's
4819 // block that are identical to the entries for BI's block.
4820 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4821
4822 // We know that the CommonDest already had an edge from PBI to
4823 // it. If it has PHIs though, the PHIs may have different
4824 // entries for BB and PBI's BB. If so, insert a select to make
4825 // them agree.
4826 for (PHINode &PN : CommonDest->phis()) {
4827 Value *BIV = PN.getIncomingValueForBlock(BB);
4828 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4829 Value *PBIV = PN.getIncomingValue(PBBIdx);
4830 if (BIV != PBIV) {
4831 // Insert a select in PBI to pick the right value.
4832 SelectInst *NV = cast<SelectInst>(
4833 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4834 PN.setIncomingValue(PBBIdx, NV);
4835 // Although the select has the same condition as PBI, the original branch
4836 // weights for PBI do not apply to the new select because the select's
4837 // 'logical' edges are incoming edges of the phi that is eliminated, not
4838 // the outgoing edges of PBI.
4839 if (HasWeights) {
4840 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4841 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4842 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4843 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4844 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4845 // The weight to PredOtherDest should be PredOther * SuccCommon.
4846 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4847 PredOther * SuccCommon};
4848
4849 fitWeights(NewWeights);
4850
4851 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4852 /*IsExpected=*/false);
4853 }
4854 }
4855 }
4856
4857 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4858 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4859
4860 // This basic block is probably dead. We know it has at least
4861 // one fewer predecessor.
4862 return true;
4863}
4864
4865// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4866// true or to FalseBB if Cond is false.
4867// Takes care of updating the successors and removing the old terminator.
4868// Also makes sure not to introduce new successors by assuming that edges to
4869// non-successor TrueBBs and FalseBBs aren't reachable.
4870bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4871 Value *Cond, BasicBlock *TrueBB,
4872 BasicBlock *FalseBB,
4873 uint32_t TrueWeight,
4874 uint32_t FalseWeight) {
4875 auto *BB = OldTerm->getParent();
4876 // Remove any superfluous successor edges from the CFG.
4877 // First, figure out which successors to preserve.
4878 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4879 // successor.
4880 BasicBlock *KeepEdge1 = TrueBB;
4881 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4882
4883 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4884
4885 // Then remove the rest.
4886 for (BasicBlock *Succ : successors(OldTerm)) {
4887 // Make sure only to keep exactly one copy of each edge.
4888 if (Succ == KeepEdge1)
4889 KeepEdge1 = nullptr;
4890 else if (Succ == KeepEdge2)
4891 KeepEdge2 = nullptr;
4892 else {
4893 Succ->removePredecessor(BB,
4894 /*KeepOneInputPHIs=*/true);
4895
4896 if (Succ != TrueBB && Succ != FalseBB)
4897 RemovedSuccessors.insert(Succ);
4898 }
4899 }
4900
4901 IRBuilder<> Builder(OldTerm);
4902 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4903
4904 // Insert an appropriate new terminator.
4905 if (!KeepEdge1 && !KeepEdge2) {
4906 if (TrueBB == FalseBB) {
4907 // We were only looking for one successor, and it was present.
4908 // Create an unconditional branch to it.
4909 Builder.CreateBr(TrueBB);
4910 } else {
4911 // We found both of the successors we were looking for.
4912 // Create a conditional branch sharing the condition of the select.
4913 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4914 if (TrueWeight != FalseWeight)
4915 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4916 }
4917 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4918 // Neither of the selected blocks were successors, so this
4919 // terminator must be unreachable.
4920 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4921 } else {
4922 // One of the selected values was a successor, but the other wasn't.
4923 // Insert an unconditional branch to the one that was found;
4924 // the edge to the one that wasn't must be unreachable.
4925 if (!KeepEdge1) {
4926 // Only TrueBB was found.
4927 Builder.CreateBr(TrueBB);
4928 } else {
4929 // Only FalseBB was found.
4930 Builder.CreateBr(FalseBB);
4931 }
4932 }
4933
4935
4936 if (DTU) {
4938 Updates.reserve(RemovedSuccessors.size());
4939 for (auto *RemovedSuccessor : RemovedSuccessors)
4940 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4941 DTU->applyUpdates(Updates);
4942 }
4943
4944 return true;
4945}
4946
4947// Replaces
4948// (switch (select cond, X, Y)) on constant X, Y
4949// with a branch - conditional if X and Y lead to distinct BBs,
4950// unconditional otherwise.
4951bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4952 SelectInst *Select) {
4953 // Check for constant integer values in the select.
4954 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4955 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4956 if (!TrueVal || !FalseVal)
4957 return false;
4958
4959 // Find the relevant condition and destinations.
4960 Value *Condition = Select->getCondition();
4961 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4962 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4963
4964 // Get weight for TrueBB and FalseBB.
4965 uint32_t TrueWeight = 0, FalseWeight = 0;
4967 bool HasWeights = hasBranchWeightMD(*SI);
4968 if (HasWeights) {
4969 getBranchWeights(SI, Weights);
4970 if (Weights.size() == 1 + SI->getNumCases()) {
4971 TrueWeight =
4972 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4973 FalseWeight =
4974 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4975 }
4976 }
4977
4978 // Perform the actual simplification.
4979 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4980 FalseWeight);
4981}
4982
4983// Replaces
4984// (indirectbr (select cond, blockaddress(@fn, BlockA),
4985// blockaddress(@fn, BlockB)))
4986// with
4987// (br cond, BlockA, BlockB).
4988bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4989 SelectInst *SI) {
4990 // Check that both operands of the select are block addresses.
4991 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4992 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4993 if (!TBA || !FBA)
4994 return false;
4995
4996 // Extract the actual blocks.
4997 BasicBlock *TrueBB = TBA->getBasicBlock();
4998 BasicBlock *FalseBB = FBA->getBasicBlock();
4999
5000 // Perform the actual simplification.
5001 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5002 0);
5003}
5004
5005/// This is called when we find an icmp instruction
5006/// (a seteq/setne with a constant) as the only instruction in a
5007/// block that ends with an uncond branch. We are looking for a very specific
5008/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5009/// this case, we merge the first two "or's of icmp" into a switch, but then the
5010/// default value goes to an uncond block with a seteq in it, we get something
5011/// like:
5012///
5013/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5014/// DEFAULT:
5015/// %tmp = icmp eq i8 %A, 92
5016/// br label %end
5017/// end:
5018/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5019///
5020/// We prefer to split the edge to 'end' so that there is a true/false entry to
5021/// the PHI, merging the third icmp into the switch.
5022bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5023 ICmpInst *ICI, IRBuilder<> &Builder) {
5024 BasicBlock *BB = ICI->getParent();
5025
5026 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5027 // complex.
5028 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5029 return false;
5030
5031 Value *V = ICI->getOperand(0);
5032 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5033
5034 // The pattern we're looking for is where our only predecessor is a switch on
5035 // 'V' and this block is the default case for the switch. In this case we can
5036 // fold the compared value into the switch to simplify things.
5037 BasicBlock *Pred = BB->getSinglePredecessor();
5038 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5039 return false;
5040
5041 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5042 if (SI->getCondition() != V)
5043 return false;
5044
5045 // If BB is reachable on a non-default case, then we simply know the value of
5046 // V in this block. Substitute it and constant fold the icmp instruction
5047 // away.
5048 if (SI->getDefaultDest() != BB) {
5049 ConstantInt *VVal = SI->findCaseDest(BB);
5050 assert(VVal && "Should have a unique destination value");
5051 ICI->setOperand(0, VVal);
5052
5053 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5054 ICI->replaceAllUsesWith(V);
5055 ICI->eraseFromParent();
5056 }
5057 // BB is now empty, so it is likely to simplify away.
5058 return requestResimplify();
5059 }
5060
5061 // Ok, the block is reachable from the default dest. If the constant we're
5062 // comparing exists in one of the other edges, then we can constant fold ICI
5063 // and zap it.
5064 if (SI->findCaseValue(Cst) != SI->case_default()) {
5065 Value *V;
5066 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5068 else
5070
5071 ICI->replaceAllUsesWith(V);
5072 ICI->eraseFromParent();
5073 // BB is now empty, so it is likely to simplify away.
5074 return requestResimplify();
5075 }
5076
5077 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5078 // the block.
5079 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5080 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5081 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5082 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5083 return false;
5084
5085 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5086 // true in the PHI.
5087 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5088 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5089
5090 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5091 std::swap(DefaultCst, NewCst);
5092
5093 // Replace ICI (which is used by the PHI for the default value) with true or
5094 // false depending on if it is EQ or NE.
5095 ICI->replaceAllUsesWith(DefaultCst);
5096 ICI->eraseFromParent();
5097
5099
5100 // Okay, the switch goes to this block on a default value. Add an edge from
5101 // the switch to the merge point on the compared value.
5102 BasicBlock *NewBB =
5103 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5104 {
5106 auto W0 = SIW.getSuccessorWeight(0);
5108 if (W0) {
5109 NewW = ((uint64_t(*W0) + 1) >> 1);
5110 SIW.setSuccessorWeight(0, *NewW);
5111 }
5112 SIW.addCase(Cst, NewBB, NewW);
5113 if (DTU)
5114 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5115 }
5116
5117 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5118 Builder.SetInsertPoint(NewBB);
5119 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5120 Builder.CreateBr(SuccBlock);
5121 PHIUse->addIncoming(NewCst, NewBB);
5122 if (DTU) {
5123 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5124 DTU->applyUpdates(Updates);
5125 }
5126 return true;
5127}
5128
5129/// The specified branch is a conditional branch.
5130/// Check to see if it is branching on an or/and chain of icmp instructions, and
5131/// fold it into a switch instruction if so.
5132bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5133 IRBuilder<> &Builder,
5134 const DataLayout &DL) {
5135 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5136 if (!Cond)
5137 return false;
5138
5139 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5140 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5141 // 'setne's and'ed together, collect them.
5142
5143 // Try to gather values from a chain of and/or to be turned into a switch
5144 ConstantComparesGatherer ConstantCompare(Cond, DL);
5145 // Unpack the result
5146 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5147 Value *CompVal = ConstantCompare.CompValue;
5148 unsigned UsedICmps = ConstantCompare.UsedICmps;
5149 Value *ExtraCase = ConstantCompare.Extra;
5150
5151 // If we didn't have a multiply compared value, fail.
5152 if (!CompVal)
5153 return false;
5154
5155 // Avoid turning single icmps into a switch.
5156 if (UsedICmps <= 1)
5157 return false;
5158
5159 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
5160
5161 // There might be duplicate constants in the list, which the switch
5162 // instruction can't handle, remove them now.
5163 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5164 Values.erase(llvm::unique(Values), Values.end());
5165
5166 // If Extra was used, we require at least two switch values to do the
5167 // transformation. A switch with one value is just a conditional branch.
5168 if (ExtraCase && Values.size() < 2)
5169 return false;
5170
5171 // TODO: Preserve branch weight metadata, similarly to how
5172 // foldValueComparisonIntoPredecessors preserves it.
5173
5174 // Figure out which block is which destination.
5175 BasicBlock *DefaultBB = BI->getSuccessor(1);
5176 BasicBlock *EdgeBB = BI->getSuccessor(0);
5177 if (!TrueWhenEqual)
5178 std::swap(DefaultBB, EdgeBB);
5179
5180 BasicBlock *BB = BI->getParent();
5181
5182 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5183 << " cases into SWITCH. BB is:\n"
5184 << *BB);
5185
5187
5188 // If there are any extra values that couldn't be folded into the switch
5189 // then we evaluate them with an explicit branch first. Split the block
5190 // right before the condbr to handle it.
5191 if (ExtraCase) {
5192 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5193 /*MSSAU=*/nullptr, "switch.early.test");
5194
5195 // Remove the uncond branch added to the old block.
5196 Instruction *OldTI = BB->getTerminator();
5197 Builder.SetInsertPoint(OldTI);
5198
5199 // There can be an unintended UB if extra values are Poison. Before the
5200 // transformation, extra values may not be evaluated according to the
5201 // condition, and it will not raise UB. But after transformation, we are
5202 // evaluating extra values before checking the condition, and it will raise
5203 // UB. It can be solved by adding freeze instruction to extra values.
5204 AssumptionCache *AC = Options.AC;
5205
5206 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5207 ExtraCase = Builder.CreateFreeze(ExtraCase);
5208
5209 if (TrueWhenEqual)
5210 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5211 else
5212 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5213
5214 OldTI->eraseFromParent();
5215
5216 if (DTU)
5217 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5218
5219 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5220 // for the edge we just added.
5221 addPredecessorToBlock(EdgeBB, BB, NewBB);
5222
5223 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5224 << "\nEXTRABB = " << *BB);
5225 BB = NewBB;
5226 }
5227
5228 Builder.SetInsertPoint(BI);
5229 // Convert pointer to int before we switch.
5230 if (CompVal->getType()->isPointerTy()) {
5231 CompVal = Builder.CreatePtrToInt(
5232 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5233 }
5234
5235 // Create the new switch instruction now.
5236 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5237
5238 // Add all of the 'cases' to the switch instruction.
5239 for (unsigned i = 0, e = Values.size(); i != e; ++i)
5240 New->addCase(Values[i], EdgeBB);
5241
5242 // We added edges from PI to the EdgeBB. As such, if there were any
5243 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5244 // the number of edges added.
5245 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5246 PHINode *PN = cast<PHINode>(BBI);
5247 Value *InVal = PN->getIncomingValueForBlock(BB);
5248 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5249 PN->addIncoming(InVal, BB);
5250 }
5251
5252 // Erase the old branch instruction.
5254 if (DTU)
5255 DTU->applyUpdates(Updates);
5256
5257 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5258 return true;
5259}
5260
5261bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5262 if (isa<PHINode>(RI->getValue()))
5263 return simplifyCommonResume(RI);
5264 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5265 RI->getValue() == RI->getParent()->getFirstNonPHI())
5266 // The resume must unwind the exception that caused control to branch here.
5267 return simplifySingleResume(RI);
5268
5269 return false;
5270}
5271
5272// Check if cleanup block is empty
5274 for (Instruction &I : R) {
5275 auto *II = dyn_cast<IntrinsicInst>(&I);
5276 if (!II)
5277 return false;
5278
5279 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5280 switch (IntrinsicID) {
5281 case Intrinsic::dbg_declare:
5282 case Intrinsic::dbg_value:
5283 case Intrinsic::dbg_label:
5284 case Intrinsic::lifetime_end:
5285 break;
5286 default:
5287 return false;
5288 }
5289 }
5290 return true;
5291}
5292
5293// Simplify resume that is shared by several landing pads (phi of landing pad).
5294bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5295 BasicBlock *BB = RI->getParent();
5296
5297 // Check that there are no other instructions except for debug and lifetime
5298 // intrinsics between the phi's and resume instruction.
5300 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
5301 return false;
5302
5303 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5304 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5305
5306 // Check incoming blocks to see if any of them are trivial.
5307 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5308 Idx++) {
5309 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5310 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5311
5312 // If the block has other successors, we can not delete it because
5313 // it has other dependents.
5314 if (IncomingBB->getUniqueSuccessor() != BB)
5315 continue;
5316
5317 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5318 // Not the landing pad that caused the control to branch here.
5319 if (IncomingValue != LandingPad)
5320 continue;
5321
5323 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5324 TrivialUnwindBlocks.insert(IncomingBB);
5325 }
5326
5327 // If no trivial unwind blocks, don't do any simplifications.
5328 if (TrivialUnwindBlocks.empty())
5329 return false;
5330
5331 // Turn all invokes that unwind here into calls.
5332 for (auto *TrivialBB : TrivialUnwindBlocks) {
5333 // Blocks that will be simplified should be removed from the phi node.
5334 // Note there could be multiple edges to the resume block, and we need
5335 // to remove them all.
5336 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5337 BB->removePredecessor(TrivialBB, true);
5338
5339 for (BasicBlock *Pred :
5341 removeUnwindEdge(Pred, DTU);
5342 ++NumInvokes;
5343 }
5344
5345 // In each SimplifyCFG run, only the current processed block can be erased.
5346 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5347 // of erasing TrivialBB, we only remove the branch to the common resume
5348 // block so that we can later erase the resume block since it has no
5349 // predecessors.
5350 TrivialBB->getTerminator()->eraseFromParent();
5351 new UnreachableInst(RI->getContext(), TrivialBB);
5352 if (DTU)
5353 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5354 }
5355
5356 // Delete the resume block if all its predecessors have been removed.
5357 if (pred_empty(BB))
5358 DeleteDeadBlock(BB, DTU);
5359
5360 return !TrivialUnwindBlocks.empty();
5361}
5362
5363// Simplify resume that is only used by a single (non-phi) landing pad.
5364bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5365 BasicBlock *BB = RI->getParent();
5366 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5367 assert(RI->getValue() == LPInst &&
5368 "Resume must unwind the exception that caused control to here");
5369
5370 // Check that there are no other instructions except for debug intrinsics.
5372 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5373 return false;
5374
5375 // Turn all invokes that unwind here into calls and delete the basic block.
5377 removeUnwindEdge(Pred, DTU);
5378 ++NumInvokes;
5379 }
5380
5381 // The landingpad is now unreachable. Zap it.
5382 DeleteDeadBlock(BB, DTU);
5383 return true;
5384}
5385
5387 // If this is a trivial cleanup pad that executes no instructions, it can be
5388 // eliminated. If the cleanup pad continues to the caller, any predecessor
5389 // that is an EH pad will be updated to continue to the caller and any
5390 // predecessor that terminates with an invoke instruction will have its invoke
5391 // instruction converted to a call instruction. If the cleanup pad being
5392 // simplified does not continue to the caller, each predecessor will be
5393 // updated to continue to the unwind destination of the cleanup pad being
5394 // simplified.
5395 BasicBlock *BB = RI->getParent();
5396 CleanupPadInst *CPInst = RI->getCleanupPad();
5397 if (CPInst->getParent() != BB)
5398 // This isn't an empty cleanup.
5399 return false;
5400
5401 // We cannot kill the pad if it has multiple uses. This typically arises
5402 // from unreachable basic blocks.
5403 if (!CPInst->hasOneUse())
5404 return false;
5405
5406 // Check that there are no other instructions except for benign intrinsics.
5408 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5409 return false;
5410
5411 // If the cleanup return we are simplifying unwinds to the caller, this will
5412 // set UnwindDest to nullptr.
5413 BasicBlock *UnwindDest = RI->getUnwindDest();
5414 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5415
5416 // We're about to remove BB from the control flow. Before we do, sink any
5417 // PHINodes into the unwind destination. Doing this before changing the
5418 // control flow avoids some potentially slow checks, since we can currently
5419 // be certain that UnwindDest and BB have no common predecessors (since they
5420 // are both EH pads).
5421 if (UnwindDest) {
5422 // First, go through the PHI nodes in UnwindDest and update any nodes that
5423 // reference the block we are removing
5424 for (PHINode &DestPN : UnwindDest->phis()) {
5425 int Idx = DestPN.getBasicBlockIndex(BB);
5426 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5427 assert(Idx != -1);
5428 // This PHI node has an incoming value that corresponds to a control
5429 // path through the cleanup pad we are removing. If the incoming
5430 // value is in the cleanup pad, it must be a PHINode (because we
5431 // verified above that the block is otherwise empty). Otherwise, the
5432 // value is either a constant or a value that dominates the cleanup
5433 // pad being removed.
5434 //
5435 // Because BB and UnwindDest are both EH pads, all of their
5436 // predecessors must unwind to these blocks, and since no instruction
5437 // can have multiple unwind destinations, there will be no overlap in
5438 // incoming blocks between SrcPN and DestPN.
5439 Value *SrcVal = DestPN.getIncomingValue(Idx);
5440 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5441
5442 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5443 for (auto *Pred : predecessors(BB)) {
5444 Value *Incoming =
5445 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5446 DestPN.addIncoming(Incoming, Pred);
5447 }
5448 }
5449
5450 // Sink any remaining PHI nodes directly into UnwindDest.
5451 Instruction *InsertPt = DestEHPad;
5452 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5453 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5454 // If the PHI node has no uses or all of its uses are in this basic
5455 // block (meaning they are debug or lifetime intrinsics), just leave
5456 // it. It will be erased when we erase BB below.
5457 continue;
5458
5459 // Otherwise, sink this PHI node into UnwindDest.
5460 // Any predecessors to UnwindDest which are not already represented
5461 // must be back edges which inherit the value from the path through
5462 // BB. In this case, the PHI value must reference itself.
5463 for (auto *pred : predecessors(UnwindDest))
5464 if (pred != BB)
5465 PN.addIncoming(&PN, pred);
5466 PN.moveBefore(InsertPt);
5467 // Also, add a dummy incoming value for the original BB itself,
5468 // so that the PHI is well-formed until we drop said predecessor.
5469 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5470 }
5471 }
5472
5473 std::vector<DominatorTree::UpdateType> Updates;
5474
5475 // We use make_early_inc_range here because we will remove all predecessors.
5477 if (UnwindDest == nullptr) {
5478 if (DTU) {
5479 DTU->applyUpdates(Updates);
5480 Updates.clear();
5481 }
5482 removeUnwindEdge(PredBB, DTU);
5483 ++NumInvokes;
5484 } else {
5485 BB->removePredecessor(PredBB);
5486 Instruction *TI = PredBB->getTerminator();
5487 TI->replaceUsesOfWith(BB, UnwindDest);
5488 if (DTU) {
5489 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5490 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5491 }
5492 }
5493 }
5494
5495 if (DTU)
5496 DTU->applyUpdates(Updates);
5497
5498 DeleteDeadBlock(BB, DTU);
5499
5500 return true;
5501}
5502
5503// Try to merge two cleanuppads together.
5505 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5506 // with.
5507 BasicBlock *UnwindDest = RI->getUnwindDest();
5508 if (!UnwindDest)
5509 return false;
5510
5511 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5512 // be safe to merge without code duplication.
5513 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5514 return false;
5515
5516 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5517 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5518 if (!SuccessorCleanupPad)
5519 return false;
5520
5521 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5522 // Replace any uses of the successor cleanupad with the predecessor pad
5523 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5524 // funclet bundle operands.
5525 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5526 // Remove the old cleanuppad.
5527 SuccessorCleanupPad->eraseFromParent();
5528 // Now, we simply replace the cleanupret with a branch to the unwind
5529 // destination.
5530 BranchInst::Create(UnwindDest, RI->getParent());
5531 RI->eraseFromParent();
5532
5533 return true;
5534}
5535
5536bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5537 // It is possible to transiantly have an undef cleanuppad operand because we
5538 // have deleted some, but not all, dead blocks.
5539 // Eventually, this block will be deleted.
5540 if (isa<UndefValue>(RI->getOperand(0)))
5541 return false;
5542
5543 if (mergeCleanupPad(RI))
5544 return true;
5545
5546 if (removeEmptyCleanup(RI, DTU))
5547 return true;
5548
5549 return false;
5550}
5551
5552// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5553bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5554 BasicBlock *BB = UI->getParent();
5555
5556 bool Changed = false;
5557
5558 // Ensure that any debug-info records that used to occur after the Unreachable
5559 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5560 // the block.
5562
5563 // Debug-info records on the unreachable inst itself should be deleted, as
5564 // below we delete everything past the final executable instruction.
5565 UI->dropDbgRecords();
5566
5567 // If there are any instructions immediately before the unreachable that can
5568 // be removed, do so.
5569 while (UI->getIterator() != BB->begin()) {
5571 --BBI;
5572
5574 break; // Can not drop any more instructions. We're done here.
5575 // Otherwise, this instruction can be freely erased,
5576 // even if it is not side-effect free.
5577
5578 // Note that deleting EH's here is in fact okay, although it involves a bit
5579 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5580 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5581 // and we can therefore guarantee this block will be erased.
5582
5583 // If we're deleting this, we're deleting any subsequent debug info, so
5584 // delete DbgRecords.
5585 BBI->dropDbgRecords();
5586
5587 // Delete this instruction (any uses are guaranteed to be dead)
5588 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5589 BBI->eraseFromParent();
5590 Changed = true;
5591 }
5592
5593 // If the unreachable instruction is the first in the block, take a gander
5594 // at all of the predecessors of this instruction, and simplify them.
5595 if (&BB->front() != UI)
5596 return Changed;
5597
5598 std::vector<DominatorTree::UpdateType> Updates;
5599
5601 for (BasicBlock *Predecessor : Preds) {
5602 Instruction *TI = Predecessor->getTerminator();
5603 IRBuilder<> Builder(TI);
5604 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5605 // We could either have a proper unconditional branch,
5606 // or a degenerate conditional branch with matching destinations.
5607 if (all_of(BI->successors(),
5608 [BB](auto *Successor) { return Successor == BB; })) {
5609 new UnreachableInst(TI->getContext(), TI->getIterator());
5610 TI->eraseFromParent();
5611 Changed = true;
5612 } else {
5613 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5614 Value* Cond = BI->getCondition();
5615 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5616 "The destinations are guaranteed to be different here.");
5617 CallInst *Assumption;
5618 if (BI->getSuccessor(0) == BB) {
5619 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5620 Builder.CreateBr(BI->getSuccessor(1));
5621 } else {
5622 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5623 Assumption = Builder.CreateAssumption(Cond);
5624 Builder.CreateBr(BI->getSuccessor(0));
5625 }
5626 if (Options.AC)
5627 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5628
5630 Changed = true;
5631 }
5632 if (DTU)
5633 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5634 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5636 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5637 if (i->getCaseSuccessor() != BB) {
5638 ++i;
5639 continue;
5640 }
5641 BB->removePredecessor(SU->getParent());
5642 i = SU.removeCase(i);
5643 e = SU->case_end();
5644 Changed = true;
5645 }
5646 // Note that the default destination can't be removed!
5647 if (DTU && SI->getDefaultDest() != BB)
5648 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5649 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5650 if (II->getUnwindDest() == BB) {
5651 if (DTU) {
5652 DTU->applyUpdates(Updates);
5653 Updates.clear();
5654 }
5655 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5656 if (!CI->doesNotThrow())
5657 CI->setDoesNotThrow();
5658 Changed = true;
5659 }
5660 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5661 if (CSI->getUnwindDest() == BB) {
5662 if (DTU) {
5663 DTU->applyUpdates(Updates);
5664 Updates.clear();
5665 }
5666 removeUnwindEdge(TI->getParent(), DTU);
5667 Changed = true;
5668 continue;
5669 }
5670
5671 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5672 E = CSI->handler_end();
5673 I != E; ++I) {
5674 if (*I == BB) {
5675 CSI->removeHandler(I);
5676 --I;
5677 --E;
5678 Changed = true;
5679 }
5680 }
5681 if (DTU)
5682 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5683 if (CSI->getNumHandlers() == 0) {
5684 if (CSI->hasUnwindDest()) {
5685 // Redirect all predecessors of the block containing CatchSwitchInst
5686 // to instead branch to the CatchSwitchInst's unwind destination.
5687 if (DTU) {
5688 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5689 Updates.push_back({DominatorTree::Insert,
5690 PredecessorOfPredecessor,
5691 CSI->getUnwindDest()});
5692 Updates.push_back({DominatorTree::Delete,
5693 PredecessorOfPredecessor, Predecessor});
5694 }
5695 }
5696 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5697 } else {
5698 // Rewrite all preds to unwind to caller (or from invoke to call).
5699 if (DTU) {
5700 DTU->applyUpdates(Updates);
5701 Updates.clear();
5702 }
5703 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5704 for (BasicBlock *EHPred : EHPreds)
5705 removeUnwindEdge(EHPred, DTU);
5706 }
5707 // The catchswitch is no longer reachable.
5708 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5709 CSI->eraseFromParent();
5710 Changed = true;
5711 }
5712 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5713 (void)CRI;
5714 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5715 "Expected to always have an unwind to BB.");
5716 if (DTU)
5717 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5718 new UnreachableInst(TI->getContext(), TI->getIterator());
5719 TI->eraseFromParent();
5720 Changed = true;
5721 }
5722 }
5723
5724 if (DTU)
5725 DTU->applyUpdates(Updates);
5726
5727 // If this block is now dead, remove it.
5728 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5729 DeleteDeadBlock(BB, DTU);
5730 return true;
5731 }
5732
5733 return Changed;
5734}
5735
5737 assert(Cases.size() >= 1);
5738
5740 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5741 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5742 return false;
5743 }
5744 return true;
5745}
5746
5748 DomTreeUpdater *DTU,
5749 bool RemoveOrigDefaultBlock = true) {
5750 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5751 auto *BB = Switch->getParent();
5752 auto *OrigDefaultBlock = Switch->getDefaultDest();
5753 if (RemoveOrigDefaultBlock)
5754 OrigDefaultBlock->removePredecessor(BB);
5755 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5756 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5757 OrigDefaultBlock);
5758 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5759 Switch->setDefaultDest(&*NewDefaultBlock);
5760 if (DTU) {
5762 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5763 if (RemoveOrigDefaultBlock &&
5764 !is_contained(successors(BB), OrigDefaultBlock))
5765 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5766 DTU->applyUpdates(Updates);
5767 }
5768}
5769
5770/// Turn a switch into an integer range comparison and branch.
5771/// Switches with more than 2 destinations are ignored.
5772/// Switches with 1 destination are also ignored.
5773bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5774 IRBuilder<> &Builder) {
5775 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5776
5777 bool HasDefault =
5778 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5779
5780 auto *BB = SI->getParent();
5781
5782 // Partition the cases into two sets with different destinations.
5783 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5784 BasicBlock *DestB = nullptr;
5787
5788 for (auto Case : SI->cases()) {
5789 BasicBlock *Dest = Case.getCaseSuccessor();
5790 if (!DestA)
5791 DestA = Dest;
5792 if (Dest == DestA) {
5793 CasesA.push_back(Case.getCaseValue());
5794 continue;
5795 }
5796 if (!DestB)
5797 DestB = Dest;
5798 if (Dest == DestB) {
5799 CasesB.push_back(Case.getCaseValue());
5800 continue;
5801 }
5802 return false; // More than two destinations.
5803 }
5804 if (!DestB)
5805 return false; // All destinations are the same and the default is unreachable
5806
5807 assert(DestA && DestB &&
5808 "Single-destination switch should have been folded.");
5809 assert(DestA != DestB);
5810 assert(DestB != SI->getDefaultDest());
5811 assert(!CasesB.empty() && "There must be non-default cases.");
5812 assert(!CasesA.empty() || HasDefault);
5813
5814 // Figure out if one of the sets of cases form a contiguous range.
5815 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5816 BasicBlock *ContiguousDest = nullptr;
5817 BasicBlock *OtherDest = nullptr;
5818 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5819 ContiguousCases = &CasesA;
5820 ContiguousDest = DestA;
5821 OtherDest = DestB;
5822 } else if (casesAreContiguous(CasesB)) {
5823 ContiguousCases = &CasesB;
5824 ContiguousDest = DestB;
5825 OtherDest = DestA;
5826 } else
5827 return false;
5828
5829 // Start building the compare and branch.
5830
5831 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5832 Constant *NumCases =
5833 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5834
5835 Value *Sub = SI->getCondition();
5836 if (!Offset->isNullValue())
5837 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5838
5839 Value *Cmp;
5840 // If NumCases overflowed, then all possible values jump to the successor.
5841 if (NumCases->isNullValue() && !ContiguousCases->empty())
5842 Cmp = ConstantInt::getTrue(SI->getContext());
5843 else
5844 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5845 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5846
5847 // Update weight for the newly-created conditional branch.
5848 if (hasBranchWeightMD(*SI)) {
5850 getBranchWeights(SI, Weights);
5851 if (Weights.size() == 1 + SI->getNumCases()) {
5852 uint64_t TrueWeight = 0;
5853 uint64_t FalseWeight = 0;
5854 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5855 if (SI->getSuccessor(I) == ContiguousDest)
5856 TrueWeight += Weights[I];
5857 else
5858 FalseWeight += Weights[I];
5859 }
5860 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5861 TrueWeight /= 2;
5862 FalseWeight /= 2;
5863 }
5864 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5865 }
5866 }
5867
5868 // Prune obsolete incoming values off the successors' PHI nodes.
5869 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5870 unsigned PreviousEdges = ContiguousCases->size();
5871 if (ContiguousDest == SI->getDefaultDest())
5872 ++PreviousEdges;
5873 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5874 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5875 }
5876 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5877 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5878 if (OtherDest == SI->getDefaultDest())
5879 ++PreviousEdges;
5880 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5881 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5882 }
5883
5884 // Clean up the default block - it may have phis or other instructions before
5885 // the unreachable terminator.
5886 if (!HasDefault)
5888
5889 auto *UnreachableDefault = SI->getDefaultDest();
5890
5891 // Drop the switch.
5892 SI->eraseFromParent();
5893
5894 if (!HasDefault && DTU)
5895 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5896
5897 return true;
5898}
5899
5900/// Compute masked bits for the condition of a switch
5901/// and use it to remove dead cases.
5903 AssumptionCache *AC,
5904 const DataLayout &DL) {
5905 Value *Cond = SI->getCondition();
5906 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5907
5908 // We can also eliminate cases by determining that their values are outside of
5909 // the limited range of the condition based on how many significant (non-sign)
5910 // bits are in the condition value.
5911 unsigned MaxSignificantBitsInCond =
5912 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5913
5914 // Gather dead cases.
5916 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5917 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5918 for (const auto &Case : SI->cases()) {
5919 auto *Successor = Case.getCaseSuccessor();
5920 if (DTU) {
5921 if (!NumPerSuccessorCases.count(Successor))
5922 UniqueSuccessors.push_back(Successor);
5923 ++NumPerSuccessorCases[Successor];
5924 }
5925 const APInt &CaseVal = Case.getCaseValue()->getValue();
5926 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5927 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5928 DeadCases.push_back(Case.getCaseValue());
5929 if (DTU)
5930 --NumPerSuccessorCases[Successor];
5931 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5932 << " is dead.\n");
5933 }
5934 }
5935
5936 // If we can prove that the cases must cover all possible values, the
5937 // default destination becomes dead and we can remove it. If we know some
5938 // of the bits in the value, we can use that to more precisely compute the
5939 // number of possible unique case values.
5940 bool HasDefault =
5941 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5942 const unsigned NumUnknownBits =
5943 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5944 assert(NumUnknownBits <= Known.getBitWidth());
5945 if (HasDefault && DeadCases.empty() &&
5946 NumUnknownBits < 64 /* avoid overflow */) {
5947 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5948 if (SI->getNumCases() == AllNumCases) {
5950 return true;
5951 }
5952 // When only one case value is missing, replace default with that case.
5953 // Eliminating the default branch will provide more opportunities for
5954 // optimization, such as lookup tables.
5955 if (SI->getNumCases() == AllNumCases - 1) {
5956 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5957 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5958 if (CondTy->getIntegerBitWidth() > 64 ||
5959 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5960 return false;
5961
5962 uint64_t MissingCaseVal = 0;
5963 for (const auto &Case : SI->cases())
5964 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5965 auto *MissingCase =
5966 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5968 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5969 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5970 SIW.setSuccessorWeight(0, 0);
5971 return true;
5972 }
5973 }
5974
5975 if (DeadCases.empty())
5976 return false;
5977
5979 for (ConstantInt *DeadCase : DeadCases) {
5980 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5981 assert(CaseI != SI->case_default() &&
5982 "Case was not found. Probably mistake in DeadCases forming.");
5983 // Prune unused values from PHI nodes.
5984 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5985 SIW.removeCase(CaseI);
5986 }
5987
5988 if (DTU) {
5989 std::vector<DominatorTree::UpdateType> Updates;
5990 for (auto *Successor : UniqueSuccessors)
5991 if (NumPerSuccessorCases[Successor] == 0)
5992 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5993 DTU->applyUpdates(Updates);
5994 }
5995
5996 return true;
5997}
5998
5999/// If BB would be eligible for simplification by
6000/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6001/// by an unconditional branch), look at the phi node for BB in the successor
6002/// block and see if the incoming value is equal to CaseValue. If so, return
6003/// the phi node, and set PhiIndex to BB's index in the phi node.
6005 BasicBlock *BB, int *PhiIndex) {
6006 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
6007 return nullptr; // BB must be empty to be a candidate for simplification.
6008 if (!BB->getSinglePredecessor())
6009 return nullptr; // BB must be dominated by the switch.
6010
6011 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
6012 if (!Branch || !Branch->isUnconditional())
6013 return nullptr; // Terminator must be unconditional branch.
6014
6015 BasicBlock *Succ = Branch->getSuccessor(0);
6016
6017 for (PHINode &PHI : Succ->phis()) {
6018 int Idx = PHI.getBasicBlockIndex(BB);
6019 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6020
6021 Value *InValue = PHI.getIncomingValue(Idx);
6022 if (InValue != CaseValue)
6023 continue;
6024
6025 *PhiIndex = Idx;
6026 return &PHI;
6027 }
6028
6029 return nullptr;
6030}
6031
6032/// Try to forward the condition of a switch instruction to a phi node
6033/// dominated by the switch, if that would mean that some of the destination
6034/// blocks of the switch can be folded away. Return true if a change is made.
6036 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6037
6038 ForwardingNodesMap ForwardingNodes;
6039 BasicBlock *SwitchBlock = SI->getParent();
6040 bool Changed = false;
6041 for (const auto &Case : SI->cases()) {
6042 ConstantInt *CaseValue = Case.getCaseValue();
6043 BasicBlock *CaseDest = Case.getCaseSuccessor();
6044
6045 // Replace phi operands in successor blocks that are using the constant case
6046 // value rather than the switch condition variable:
6047 // switchbb:
6048 // switch i32 %x, label %default [
6049 // i32 17, label %succ
6050 // ...
6051 // succ:
6052 // %r = phi i32 ... [ 17, %switchbb ] ...
6053 // -->
6054 // %r = phi i32 ... [ %x, %switchbb ] ...
6055
6056 for (PHINode &Phi : CaseDest->phis()) {
6057 // This only works if there is exactly 1 incoming edge from the switch to
6058 // a phi. If there is >1, that means multiple cases of the switch map to 1
6059 // value in the phi, and that phi value is not the switch condition. Thus,
6060 // this transform would not make sense (the phi would be invalid because
6061 // a phi can't have different incoming values from the same block).
6062 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6063 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6064 count(Phi.blocks(), SwitchBlock) == 1) {
6065 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6066 Changed = true;
6067 }
6068 }
6069
6070 // Collect phi nodes that are indirectly using this switch's case constants.
6071 int PhiIdx;
6072 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6073 ForwardingNodes[Phi].push_back(PhiIdx);
6074 }
6075
6076 for (auto &ForwardingNode : ForwardingNodes) {
6077 PHINode *Phi = ForwardingNode.first;
6078 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6079 // Check if it helps to fold PHI.
6080 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6081 continue;
6082
6083 for (int Index : Indexes)
6084 Phi->setIncomingValue(Index, SI->getCondition());
6085 Changed = true;
6086 }
6087
6088 return Changed;
6089}
6090
6091/// Return true if the backend will be able to handle
6092/// initializing an array of constants like C.
6094 if (C->isThreadDependent())
6095 return false;
6096 if (C->isDLLImportDependent())
6097 return false;
6098
6099 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6100 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6101 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6102 return false;
6103
6104 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6105 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6106 // materializing the array of constants.
6107 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6108 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6109 return false;
6110 }
6111
6113 return false;
6114
6115 return true;
6116}
6117
6118/// If V is a Constant, return it. Otherwise, try to look up
6119/// its constant value in ConstantPool, returning 0 if it's not there.
6120static Constant *
6123 if (Constant *C = dyn_cast<Constant>(V))
6124 return C;
6125 return ConstantPool.lookup(V);
6126}
6127
6128/// Try to fold instruction I into a constant. This works for
6129/// simple instructions such as binary operations where both operands are
6130/// constant or can be replaced by constants from the ConstantPool. Returns the
6131/// resulting constant on success, 0 otherwise.
6132static Constant *
6135 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6136 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6137 if (!A)
6138 return nullptr;
6139 if (A->isAllOnesValue())
6140 return lookupConstant(Select->getTrueValue(), ConstantPool);
6141 if (A->isNullValue())
6142 return lookupConstant(Select->getFalseValue(), ConstantPool);
6143 return nullptr;
6144 }
6145
6147 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6148 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6149 COps.push_back(A);
6150 else
6151 return nullptr;
6152 }
6153
6154 return ConstantFoldInstOperands(I, COps, DL);
6155}
6156
6157/// Try to determine the resulting constant values in phi nodes
6158/// at the common destination basic block, *CommonDest, for one of the case
6159/// destionations CaseDest corresponding to value CaseVal (0 for the default
6160/// case), of a switch instruction SI.
6161static bool
6163 BasicBlock **CommonDest,
6164 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6165 const DataLayout &DL, const TargetTransformInfo &TTI) {
6166 // The block from which we enter the common destination.
6167 BasicBlock *Pred = SI->getParent();
6168
6169 // If CaseDest is empty except for some side-effect free instructions through
6170 // which we can constant-propagate the CaseVal, continue to its successor.
6172 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6173 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6174 if (I.isTerminator()) {
6175 // If the terminator is a simple branch, continue to the next block.
6176 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6177 return false;
6178 Pred = CaseDest;
6179 CaseDest = I.getSuccessor(0);
6180 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6181 // Instruction is side-effect free and constant.
6182
6183 // If the instruction has uses outside this block or a phi node slot for
6184 // the block, it is not safe to bypass the instruction since it would then
6185 // no longer dominate all its uses.
6186 for (auto &Use : I.uses()) {
6187 User *User = Use.getUser();
6188 if (Instruction *I = dyn_cast<Instruction>(User))
6189 if (I->getParent() == CaseDest)
6190 continue;
6191 if (PHINode *Phi = dyn_cast<PHINode>(User))
6192 if (Phi->getIncomingBlock(Use) == CaseDest)
6193 continue;
6194 return false;
6195 }
6196
6197 ConstantPool.insert(std::make_pair(&I, C));
6198 } else {
6199 break;
6200 }
6201 }
6202
6203 // If we did not have a CommonDest before, use the current one.
6204 if (!*CommonDest)
6205 *CommonDest = CaseDest;
6206 // If the destination isn't the common one, abort.
6207 if (CaseDest != *CommonDest)
6208 return false;
6209
6210 // Get the values for this case from phi nodes in the destination block.
6211 for (PHINode &PHI : (*CommonDest)->phis()) {
6212 int Idx = PHI.getBasicBlockIndex(Pred);
6213 if (Idx == -1)
6214 continue;
6215
6216 Constant *ConstVal =
6217 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6218 if (!ConstVal)
6219 return false;
6220
6221 // Be conservative about which kinds of constants we support.
6222 if (!validLookupTableConstant(ConstVal, TTI))
6223 return false;
6224
6225 Res.push_back(std::make_pair(&PHI, ConstVal));
6226 }
6227
6228 return Res.size() > 0;
6229}
6230
6231// Helper function used to add CaseVal to the list of cases that generate
6232// Result. Returns the updated number of cases that generate this result.
6233static size_t mapCaseToResult(ConstantInt *CaseVal,
6234 SwitchCaseResultVectorTy &UniqueResults,
6235 Constant *Result) {
6236 for (auto &I : UniqueResults) {
6237 if (I.first == Result) {
6238 I.second.push_back(CaseVal);
6239 return I.second.size();
6240 }
6241 }
6242 UniqueResults.push_back(
6243 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6244 return 1;
6245}
6246
6247// Helper function that initializes a map containing
6248// results for the PHI node of the common destination block for a switch
6249// instruction. Returns false if multiple PHI nodes have been found or if
6250// there is not a common destination block for the switch.
6252 BasicBlock *&CommonDest,
6253 SwitchCaseResultVectorTy &UniqueResults,
6254 Constant *&DefaultResult,
6255 const DataLayout &DL,
6256 const TargetTransformInfo &TTI,
6257 uintptr_t MaxUniqueResults) {
6258 for (const auto &I : SI->cases()) {
6259 ConstantInt *CaseVal = I.getCaseValue();
6260
6261 // Resulting value at phi nodes for this case value.
6262 SwitchCaseResultsTy Results;
6263 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6264 DL, TTI))
6265 return false;
6266
6267 // Only one value per case is permitted.
6268 if (Results.size() > 1)
6269 return false;
6270
6271 // Add the case->result mapping to UniqueResults.
6272 const size_t NumCasesForResult =
6273 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6274
6275 // Early out if there are too many cases for this result.
6276 if (NumCasesForResult > MaxSwitchCasesPerResult)
6277 return false;
6278
6279 // Early out if there are too many unique results.
6280 if (UniqueResults.size() > MaxUniqueResults)
6281 return false;
6282
6283 // Check the PHI consistency.
6284 if (!PHI)
6285 PHI = Results[0].first;
6286 else if (PHI != Results[0].first)
6287 return false;
6288 }
6289 // Find the default result value.
6291 BasicBlock *DefaultDest = SI->getDefaultDest();
6292 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6293 DL, TTI);
6294 // If the default value is not found abort unless the default destination
6295 // is unreachable.
6296 DefaultResult =
6297 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6298 if ((!DefaultResult &&
6299 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6300 return false;
6301
6302 return true;
6303}
6304
6305// Helper function that checks if it is possible to transform a switch with only
6306// two cases (or two cases + default) that produces a result into a select.
6307// TODO: Handle switches with more than 2 cases that map to the same result.
6308static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6309 Constant *DefaultResult, Value *Condition,
6310 IRBuilder<> &Builder) {
6311 // If we are selecting between only two cases transform into a simple
6312 // select or a two-way select if default is possible.
6313 // Example:
6314 // switch (a) { %0 = icmp eq i32 %a, 10
6315 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6316 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6317 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6318 // }
6319 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6320 ResultVector[1].second.size() == 1) {
6321 ConstantInt *FirstCase = ResultVector[0].second[0];
6322 ConstantInt *SecondCase = ResultVector[1].second[0];
6323 Value *SelectValue = ResultVector[1].first;
6324 if (DefaultResult) {
6325 Value *ValueCompare =
6326 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6327 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6328 DefaultResult, "switch.select");
6329 }
6330 Value *ValueCompare =
6331 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6332 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6333 SelectValue, "switch.select");
6334 }
6335
6336 // Handle the degenerate case where two cases have the same result value.
6337 if (ResultVector.size() == 1 && DefaultResult) {
6338 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6339 unsigned CaseCount = CaseValues.size();
6340 // n bits group cases map to the same result:
6341 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6342 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6343 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6344 if (isPowerOf2_32(CaseCount)) {
6345 ConstantInt *MinCaseVal = CaseValues[0];
6346 // Find mininal value.
6347 for (auto *Case : CaseValues)
6348 if (Case->getValue().slt(MinCaseVal->getValue()))
6349 MinCaseVal = Case;
6350
6351 // Mark the bits case number touched.
6352 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6353 for (auto *Case : CaseValues)
6354 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6355
6356 // Check if cases with the same result can cover all number
6357 // in touched bits.
6358 if (BitMask.popcount() == Log2_32(CaseCount)) {
6359 if (!MinCaseVal->isNullValue())
6360 Condition = Builder.CreateSub(Condition, MinCaseVal);
6361 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6362 Value *Cmp = Builder.CreateICmpEQ(
6363 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6364 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6365 }
6366 }
6367
6368 // Handle the degenerate case where two cases have the same value.
6369 if (CaseValues.size() == 2) {
6370 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6371 "switch.selectcmp.case1");
6372 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6373 "switch.selectcmp.case2");
6374 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6375 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6376 }
6377 }
6378
6379 return nullptr;
6380}
6381
6382// Helper function to cleanup a switch instruction that has been converted into
6383// a select, fixing up PHI nodes and basic blocks.
6385 Value *SelectValue,
6386 IRBuilder<> &Builder,
6387 DomTreeUpdater *DTU) {
6388 std::vector<DominatorTree::UpdateType> Updates;
6389
6390 BasicBlock *SelectBB = SI->getParent();
6391 BasicBlock *DestBB = PHI->getParent();
6392
6393 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6394 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6395 Builder.CreateBr(DestBB);
6396
6397 // Remove the switch.
6398
6399 PHI->removeIncomingValueIf(
6400 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6401 PHI->addIncoming(SelectValue, SelectBB);
6402
6403 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6404 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6405 BasicBlock *Succ = SI->getSuccessor(i);
6406
6407 if (Succ == DestBB)
6408 continue;
6409 Succ->removePredecessor(SelectBB);
6410 if (DTU && RemovedSuccessors.insert(Succ).second)
6411 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6412 }
6413 SI->eraseFromParent();
6414 if (DTU)
6415 DTU->applyUpdates(Updates);
6416}
6417
6418/// If a switch is only used to initialize one or more phi nodes in a common
6419/// successor block with only two different constant values, try to replace the
6420/// switch with a select. Returns true if the fold was made.
6421static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6422 DomTreeUpdater *DTU, const DataLayout &DL,
6423 const TargetTransformInfo &TTI) {
6424 Value *const Cond = SI->getCondition();
6425 PHINode *PHI = nullptr;
6426 BasicBlock *CommonDest = nullptr;
6427 Constant *DefaultResult;
6428 SwitchCaseResultVectorTy UniqueResults;
6429 // Collect all the cases that will deliver the same value from the switch.
6430 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6431 DL, TTI, /*MaxUniqueResults*/ 2))
6432 return false;
6433
6434 assert(PHI != nullptr && "PHI for value select not found");
6435 Builder.SetInsertPoint(SI);
6436 Value *SelectValue =
6437 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6438 if (!SelectValue)
6439 return false;
6440
6441 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6442 return true;
6443}
6444
6445namespace {
6446
6447/// This class represents a lookup table that can be used to replace a switch.
6448class SwitchLookupTable {
6449public:
6450 /// Create a lookup table to use as a switch replacement with the contents
6451 /// of Values, using DefaultValue to fill any holes in the table.
6452 SwitchLookupTable(
6453 Module &M, uint64_t TableSize, ConstantInt *Offset,
6454 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6455 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6456
6457 /// Build instructions with Builder to retrieve the value at
6458 /// the position given by Index in the lookup table.
6459 Value *buildLookup(Value *Index, IRBuilder<> &Builder);
6460
6461 /// Return true if a table with TableSize elements of
6462 /// type ElementType would fit in a target-legal register.
6463 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6464 Type *ElementType);
6465
6466private:
6467 // Depending on the contents of the table, it can be represented in
6468 // different ways.
6469 enum {
6470 // For tables where each element contains the same value, we just have to
6471 // store that single value and return it for each lookup.
6472 SingleValueKind,
6473
6474 // For tables where there is a linear relationship between table index
6475 // and values. We calculate the result with a simple multiplication
6476 // and addition instead of a table lookup.
6477 LinearMapKind,
6478
6479 // For small tables with integer elements, we can pack them into a bitmap
6480 // that fits into a target-legal register. Values are retrieved by
6481 // shift and mask operations.
6482 BitMapKind,
6483
6484 // The table is stored as an array of values. Values are retrieved by load
6485 // instructions from the table.
6486 ArrayKind
6487 } Kind;
6488
6489 // For SingleValueKind, this is the single value.
6490 Constant *SingleValue = nullptr;
6491
6492 // For BitMapKind, this is the bitmap.
6493 ConstantInt *BitMap = nullptr;
6494 IntegerType *BitMapElementTy = nullptr;
6495
6496 // For LinearMapKind, these are the constants used to derive the value.
6497 ConstantInt *LinearOffset = nullptr;
6498 ConstantInt *LinearMultiplier = nullptr;
6499 bool LinearMapValWrapped = false;
6500
6501 // For ArrayKind, this is the array.
6502 GlobalVariable *Array = nullptr;
6503};
6504
6505} // end anonymous namespace
6506
6507SwitchLookupTable::SwitchLookupTable(
6508 Module &M, uint64_t TableSize, ConstantInt *Offset,
6509 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6510 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6511 assert(Values.size() && "Can't build lookup table without values!");
6512 assert(TableSize >= Values.size() && "Can't fit values in table!");
6513
6514 // If all values in the table are equal, this is that value.
6515 SingleValue = Values.begin()->second;
6516
6517 Type *ValueType = Values.begin()->second->getType();
6518
6519 // Build up the table contents.
6520 SmallVector<Constant *, 64> TableContents(TableSize);
6521 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6522 ConstantInt *CaseVal = Values[I].first;
6523 Constant *CaseRes = Values[I].second;
6524 assert(CaseRes->getType() == ValueType);
6525
6526 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6527 TableContents[Idx] = CaseRes;
6528
6529 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6530 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6531 }
6532
6533 // Fill in any holes in the table with the default result.
6534 if (Values.size() < TableSize) {
6535 assert(DefaultValue &&
6536 "Need a default value to fill the lookup table holes.");
6537 assert(DefaultValue->getType() == ValueType);
6538 for (uint64_t I = 0; I < TableSize; ++I) {
6539 if (!TableContents[I])
6540 TableContents[I] = DefaultValue;
6541 }
6542
6543 // If the default value is poison, all the holes are poison.
6544 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6545
6546 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6547 SingleValue = nullptr;
6548 }
6549
6550 // If each element in the table contains the same value, we only need to store
6551 // that single value.
6552 if (SingleValue) {
6553 Kind = SingleValueKind;
6554 return;
6555 }
6556
6557 // Check if we can derive the value with a linear transformation from the
6558 // table index.
6559 if (isa<IntegerType>(ValueType)) {
6560 bool LinearMappingPossible = true;
6561 APInt PrevVal;
6562 APInt DistToPrev;
6563 // When linear map is monotonic and signed overflow doesn't happen on
6564 // maximum index, we can attach nsw on Add and Mul.
6565 bool NonMonotonic = false;
6566 assert(TableSize >= 2 && "Should be a SingleValue table.");
6567 // Check if there is the same distance between two consecutive values.
6568 for (uint64_t I = 0; I < TableSize; ++I) {
6569 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6570
6571 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6572 // This is an poison, so it's (probably) a lookup table hole.
6573 // To prevent any regressions from before we switched to using poison as
6574 // the default value, holes will fall back to using the first value.
6575 // This can be removed once we add proper handling for poisons in lookup
6576 // tables.
6577 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6578 }
6579
6580 if (!ConstVal) {
6581 // This is an undef. We could deal with it, but undefs in lookup tables
6582 // are very seldom. It's probably not worth the additional complexity.
6583 LinearMappingPossible = false;
6584 break;
6585 }
6586 const APInt &Val = ConstVal->getValue();
6587 if (I != 0) {
6588 APInt Dist = Val - PrevVal;
6589 if (I == 1) {
6590 DistToPrev = Dist;
6591 } else if (Dist != DistToPrev) {
6592 LinearMappingPossible = false;
6593 break;
6594 }
6595 NonMonotonic |=
6596 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6597 }
6598 PrevVal = Val;
6599 }
6600 if (LinearMappingPossible) {
6601 LinearOffset = cast<ConstantInt>(TableContents[0]);
6602 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6603 APInt M = LinearMultiplier->getValue();
6604 bool MayWrap = true;
6605 if (isIntN(M.getBitWidth(), TableSize - 1))
6606 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6607 LinearMapValWrapped = NonMonotonic || MayWrap;
6608 Kind = LinearMapKind;
6609 ++NumLinearMaps;
6610 return;
6611 }
6612 }
6613
6614 // If the type is integer and the table fits in a register, build a bitmap.
6615 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6616 IntegerType *IT = cast<IntegerType>(ValueType);
6617 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6618 for (uint64_t I = TableSize; I > 0; --I) {
6619 TableInt <<= IT->getBitWidth();
6620 // Insert values into the bitmap. Undef values are set to zero.
6621 if (!isa<UndefValue>(TableContents[I - 1])) {
6622 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6623 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6624 }
6625 }
6626 BitMap = ConstantInt::get(M.getContext(), TableInt);
6627 BitMapElementTy = IT;
6628 Kind = BitMapKind;
6629 ++NumBitMaps;
6630 return;
6631 }
6632
6633 // Store the table in an array.
6634 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6635 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6636
6637 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6638 GlobalVariable::PrivateLinkage, Initializer,
6639 "switch.table." + FuncName);
6640 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6641 // Set the alignment to that of an array items. We will be only loading one
6642 // value out of it.
6643 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6644 Kind = ArrayKind;
6645}
6646
6647Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) {
6648 switch (Kind) {
6649 case SingleValueKind:
6650 return SingleValue;
6651 case LinearMapKind: {
6652 // Derive the result value from the input value.
6653 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6654 false, "switch.idx.cast");
6655 if (!LinearMultiplier->isOne())
6656 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6657 /*HasNUW = */ false,
6658 /*HasNSW = */ !LinearMapValWrapped);
6659
6660 if (!LinearOffset->isZero())
6661 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6662 /*HasNUW = */ false,
6663 /*HasNSW = */ !LinearMapValWrapped);
6664 return Result;
6665 }
6666 case BitMapKind: {
6667 // Type of the bitmap (e.g. i59).
6668 IntegerType *MapTy = BitMap->getIntegerType();
6669
6670 // Cast Index to the same type as the bitmap.
6671 // Note: The Index is <= the number of elements in the table, so
6672 // truncating it to the width of the bitmask is safe.
6673 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6674
6675 // Multiply the shift amount by the element width. NUW/NSW can always be
6676 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6677 // BitMap's bit width.
6678 ShiftAmt = Builder.CreateMul(
6679 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6680 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6681
6682 // Shift down.
6683 Value *DownShifted =
6684 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6685 // Mask off.
6686 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6687 }
6688 case ArrayKind: {
6689 // Make sure the table index will not overflow when treated as signed.
6690 IntegerType *IT = cast<IntegerType>(Index->getType());
6691 uint64_t TableSize =
6692 Array->getInitializer()->getType()->getArrayNumElements();
6693 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6694 Index = Builder.CreateZExt(
6695 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6696 "switch.tableidx.zext");
6697
6698 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6699 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6700 GEPIndices, "switch.gep");
6701 return Builder.CreateLoad(
6702 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6703 "switch.load");
6704 }
6705 }
6706 llvm_unreachable("Unknown lookup table kind!");
6707}
6708
6709bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6710 uint64_t TableSize,
6711 Type *ElementType) {
6712 auto *IT = dyn_cast<IntegerType>(ElementType);
6713 if (!IT)
6714 return false;
6715 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6716 // are <= 15, we could try to narrow the type.
6717
6718 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6719 if (TableSize >= UINT_MAX / IT->getBitWidth())
6720 return false;
6721 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6722}
6723
6725 const DataLayout &DL) {
6726 // Allow any legal type.
6727 if (TTI.isTypeLegal(Ty))
6728 return true;
6729
6730 auto *IT = dyn_cast<IntegerType>(Ty);
6731 if (!IT)
6732 return false;
6733
6734 // Also allow power of 2 integer types that have at least 8 bits and fit in
6735 // a register. These types are common in frontend languages and targets
6736 // usually support loads of these types.
6737 // TODO: We could relax this to any integer that fits in a register and rely
6738 // on ABI alignment and padding in the table to allow the load to be widened.
6739 // Or we could widen the constants and truncate the load.
6740 unsigned BitWidth = IT->getBitWidth();
6741 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6742 DL.fitsInLegalInteger(IT->getBitWidth());
6743}
6744
6745static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6746 // 40% is the default density for building a jump table in optsize/minsize
6747 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6748 // function was based on.
6749 const uint64_t MinDensity = 40;
6750
6751 if (CaseRange >= UINT64_MAX / 100)
6752 return false; // Avoid multiplication overflows below.
6753
6754 return NumCases * 100 >= CaseRange * MinDensity;
6755}
6756
6758 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6759 uint64_t Range = Diff + 1;
6760 if (Range < Diff)
6761 return false; // Overflow.
6762
6763 return isSwitchDense(Values.size(), Range);
6764}
6765
6766/// Determine whether a lookup table should be built for this switch, based on
6767/// the number of cases, size of the table, and the types of the results.
6768// TODO: We could support larger than legal types by limiting based on the
6769// number of loads required and/or table size. If the constants are small we
6770// could use smaller table entries and extend after the load.
6771static bool
6773 const TargetTransformInfo &TTI, const DataLayout &DL,
6774 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6775 if (SI->getNumCases() > TableSize)
6776 return false; // TableSize overflowed.
6777
6778 bool AllTablesFitInRegister = true;
6779 bool HasIllegalType = false;
6780 for (const auto &I : ResultTypes) {
6781 Type *Ty = I.second;
6782
6783 // Saturate this flag to true.
6784 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6785
6786 // Saturate this flag to false.
6787 AllTablesFitInRegister =
6788 AllTablesFitInRegister &&
6789 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6790
6791 // If both flags saturate, we're done. NOTE: This *only* works with
6792 // saturating flags, and all flags have to saturate first due to the
6793 // non-deterministic behavior of iterating over a dense map.
6794 if (HasIllegalType && !AllTablesFitInRegister)
6795 break;
6796 }
6797
6798 // If each table would fit in a register, we should build it anyway.
6799 if (AllTablesFitInRegister)
6800 return true;
6801
6802 // Don't build a table that doesn't fit in-register if it has illegal types.
6803 if (HasIllegalType)
6804 return false;
6805
6806 return isSwitchDense(SI->getNumCases(), TableSize);
6807}
6808
6810 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6811 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6812 const DataLayout &DL, const TargetTransformInfo &TTI) {
6813 if (MinCaseVal.isNullValue())
6814 return true;
6815 if (MinCaseVal.isNegative() ||
6816 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6817 !HasDefaultResults)
6818 return false;
6819 return all_of(ResultTypes, [&](const auto &KV) {
6820 return SwitchLookupTable::wouldFitInRegister(
6821 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6822 KV.second /* ResultType */);
6823 });
6824}
6825
6826/// Try to reuse the switch table index compare. Following pattern:
6827/// \code
6828/// if (idx < tablesize)
6829/// r = table[idx]; // table does not contain default_value
6830/// else
6831/// r = default_value;
6832/// if (r != default_value)
6833/// ...
6834/// \endcode
6835/// Is optimized to:
6836/// \code
6837/// cond = idx < tablesize;
6838/// if (cond)
6839/// r = table[idx];
6840/// else
6841/// r = default_value;
6842/// if (cond)
6843/// ...
6844/// \endcode
6845/// Jump threading will then eliminate the second if(cond).
6847 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6848 Constant *DefaultValue,
6849 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6850 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6851 if (!CmpInst)
6852 return;
6853
6854 // We require that the compare is in the same block as the phi so that jump
6855 // threading can do its work afterwards.
6856 if (CmpInst->getParent() != PhiBlock)
6857 return;
6858
6859 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6860 if (!CmpOp1)
6861 return;
6862
6863 Value *RangeCmp = RangeCheckBranch->getCondition();
6864 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6865 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6866
6867 // Check if the compare with the default value is constant true or false.
6868 const DataLayout &DL = PhiBlock->getDataLayout();
6870 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6871 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6872 return;
6873
6874 // Check if the compare with the case values is distinct from the default
6875 // compare result.
6876 for (auto ValuePair : Values) {
6878 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6879 if (!CaseConst || CaseConst == DefaultConst ||
6880 (CaseConst != TrueConst && CaseConst != FalseConst))
6881 return;
6882 }
6883
6884 // Check if the branch instruction dominates the phi node. It's a simple
6885 // dominance check, but sufficient for our needs.
6886 // Although this check is invariant in the calling loops, it's better to do it
6887 // at this late stage. Practically we do it at most once for a switch.
6888 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6889 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6890 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6891 return;
6892 }
6893
6894 if (DefaultConst == FalseConst) {
6895 // The compare yields the same result. We can replace it.
6896 CmpInst->replaceAllUsesWith(RangeCmp);
6897 ++NumTableCmpReuses;
6898 } else {
6899 // The compare yields the same result, just inverted. We can replace it.
6900 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6901 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6902 RangeCheckBranch->getIterator());
6903 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6904 ++NumTableCmpReuses;
6905 }
6906}
6907
6908/// If the switch is only used to initialize one or more phi nodes in a common
6909/// successor block with different constant values, replace the switch with
6910/// lookup tables.
6912 DomTreeUpdater *DTU, const DataLayout &DL,
6913 const TargetTransformInfo &TTI) {
6914 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6915
6916 BasicBlock *BB = SI->getParent();
6917 Function *Fn = BB->getParent();
6918 // Only build lookup table when we have a target that supports it or the
6919 // attribute is not set.
6921 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6922 return false;
6923
6924 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6925 // split off a dense part and build a lookup table for that.
6926
6927 // FIXME: This creates arrays of GEPs to constant strings, which means each
6928 // GEP needs a runtime relocation in PIC code. We should just build one big
6929 // string and lookup indices into that.
6930
6931 // Ignore switches with less than three cases. Lookup tables will not make
6932 // them faster, so we don't analyze them.
6933 if (SI->getNumCases() < 3)
6934 return false;
6935
6936 // Figure out the corresponding result for each case value and phi node in the
6937 // common destination, as well as the min and max case values.
6938 assert(!SI->cases().empty());
6939 SwitchInst::CaseIt CI = SI->case_begin();
6940 ConstantInt *MinCaseVal = CI->getCaseValue();
6941 ConstantInt *MaxCaseVal = CI->getCaseValue();
6942
6943 BasicBlock *CommonDest = nullptr;
6944
6945 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6947
6951
6952 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6953 ConstantInt *CaseVal = CI->getCaseValue();
6954 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6955 MinCaseVal = CaseVal;
6956 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6957 MaxCaseVal = CaseVal;
6958
6959 // Resulting value at phi nodes for this case value.
6961 ResultsTy Results;
6962 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6963 Results, DL, TTI))
6964 return false;
6965
6966 // Append the result from this case to the list for each phi.
6967 for (const auto &I : Results) {
6968 PHINode *PHI = I.first;
6969 Constant *Value = I.second;
6970 if (!ResultLists.count(PHI))
6971 PHIs.push_back(PHI);
6972 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6973 }
6974 }
6975
6976 // Keep track of the result types.
6977 for (PHINode *PHI : PHIs) {
6978 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6979 }
6980
6981 uint64_t NumResults = ResultLists[PHIs[0]].size();
6982
6983 // If the table has holes, we need a constant result for the default case
6984 // or a bitmask that fits in a register.
6985 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6986 bool HasDefaultResults =
6987 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6988 DefaultResultsList, DL, TTI);
6989
6990 for (const auto &I : DefaultResultsList) {
6991 PHINode *PHI = I.first;
6992 Constant *Result = I.second;
6993 DefaultResults[PHI] = Result;
6994 }
6995
6996 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6997 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6998 uint64_t TableSize;
6999 if (UseSwitchConditionAsTableIndex)
7000 TableSize = MaxCaseVal->getLimitedValue() + 1;
7001 else
7002 TableSize =
7003 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7004
7005 // If the default destination is unreachable, or if the lookup table covers
7006 // all values of the conditional variable, branch directly to the lookup table
7007 // BB. Otherwise, check that the condition is within the case range.
7008 bool DefaultIsReachable = !SI->defaultDestUndefined();
7009
7010 bool TableHasHoles = (NumResults < TableSize);
7011
7012 // If the table has holes but the default destination doesn't produce any
7013 // constant results, the lookup table entries corresponding to the holes will
7014 // contain poison.
7015 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7016
7017 // If the default destination doesn't produce a constant result but is still
7018 // reachable, and the lookup table has holes, we need to use a mask to
7019 // determine if the current index should load from the lookup table or jump
7020 // to the default case.
7021 // The mask is unnecessary if the table has holes but the default destination
7022 // is unreachable, as in that case the holes must also be unreachable.
7023 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7024 if (NeedMask) {
7025 // As an extra penalty for the validity test we require more cases.
7026 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7027 return false;
7028 if (!DL.fitsInLegalInteger(TableSize))
7029 return false;
7030 }
7031
7032 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7033 return false;
7034
7035 std::vector<DominatorTree::UpdateType> Updates;
7036
7037 // Compute the maximum table size representable by the integer type we are
7038 // switching upon.
7039 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7040 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7041 assert(MaxTableSize >= TableSize &&
7042 "It is impossible for a switch to have more entries than the max "
7043 "representable value of its input integer type's size.");
7044
7045 // Create the BB that does the lookups.
7046 Module &Mod = *CommonDest->getParent()->getParent();
7047 BasicBlock *LookupBB = BasicBlock::Create(
7048 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7049
7050 // Compute the table index value.
7051 Builder.SetInsertPoint(SI);
7052 Value *TableIndex;
7053 ConstantInt *TableIndexOffset;
7054 if (UseSwitchConditionAsTableIndex) {
7055 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7056 TableIndex = SI->getCondition();
7057 } else {
7058 TableIndexOffset = MinCaseVal;
7059 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7060 // we can try to attach nsw.
7061 bool MayWrap = true;
7062 if (!DefaultIsReachable) {
7063 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7064 (void)Res;
7065 }
7066
7067 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7068 "switch.tableidx", /*HasNUW =*/false,
7069 /*HasNSW =*/!MayWrap);
7070 }
7071
7072 BranchInst *RangeCheckBranch = nullptr;
7073
7074 // Grow the table to cover all possible index values to avoid the range check.
7075 // It will use the default result to fill in the table hole later, so make
7076 // sure it exist.
7077 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7078 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
7079 // Grow the table shouldn't have any size impact by checking
7080 // wouldFitInRegister.
7081 // TODO: Consider growing the table also when it doesn't fit in a register
7082 // if no optsize is specified.
7083 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7084 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
7085 return SwitchLookupTable::wouldFitInRegister(
7086 DL, UpperBound, KV.second /* ResultType */);
7087 })) {
7088 // There may be some case index larger than the UpperBound (unreachable
7089 // case), so make sure the table size does not get smaller.
7090 TableSize = std::max(UpperBound, TableSize);
7091 // The default branch is unreachable after we enlarge the lookup table.
7092 // Adjust DefaultIsReachable to reuse code path.
7093 DefaultIsReachable = false;
7094 }
7095 }
7096
7097 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7098 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7099 Builder.CreateBr(LookupBB);
7100 if (DTU)
7101 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7102 // Note: We call removeProdecessor later since we need to be able to get the
7103 // PHI value for the default case in case we're using a bit mask.
7104 } else {
7105 Value *Cmp = Builder.CreateICmpULT(
7106 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7107 RangeCheckBranch =
7108 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7109 if (DTU)
7110 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7111 }
7112
7113 // Populate the BB that does the lookups.
7114 Builder.SetInsertPoint(LookupBB);
7115
7116 if (NeedMask) {
7117 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7118 // re-purposed to do the hole check, and we create a new LookupBB.
7119 BasicBlock *MaskBB = LookupBB;
7120 MaskBB->setName("switch.hole_check");
7121 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7122 CommonDest->getParent(), CommonDest);
7123
7124 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7125 // unnecessary illegal types.
7126 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7127 APInt MaskInt(TableSizePowOf2, 0);
7128 APInt One(TableSizePowOf2, 1);
7129 // Build bitmask; fill in a 1 bit for every case.
7130 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7131 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
7132 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
7133 .getLimitedValue();
7134 MaskInt |= One << Idx;
7135 }
7136 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7137
7138 // Get the TableIndex'th bit of the bitmask.
7139 // If this bit is 0 (meaning hole) jump to the default destination,
7140 // else continue with table lookup.
7141 IntegerType *MapTy = TableMask->getIntegerType();
7142 Value *MaskIndex =
7143 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7144 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7145 Value *LoBit = Builder.CreateTrunc(
7146 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7147 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7148 if (DTU) {
7149 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7150 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7151 }
7152 Builder.SetInsertPoint(LookupBB);
7153 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7154 }
7155
7156 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7157 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7158 // do not delete PHINodes here.
7159 SI->getDefaultDest()->removePredecessor(BB,
7160 /*KeepOneInputPHIs=*/true);
7161 if (DTU)
7162 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7163 }
7164
7165 for (PHINode *PHI : PHIs) {
7166 const ResultListTy &ResultList = ResultLists[PHI];
7167
7168 Type *ResultType = ResultList.begin()->second->getType();
7169
7170 // Use any value to fill the lookup table holes.
7171 Constant *DV =
7172 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7173 StringRef FuncName = Fn->getName();
7174 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7175 DL, FuncName);
7176
7177 Value *Result = Table.buildLookup(TableIndex, Builder);
7178
7179 // Do a small peephole optimization: re-use the switch table compare if
7180 // possible.
7181 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7182 BasicBlock *PhiBlock = PHI->getParent();
7183 // Search for compare instructions which use the phi.
7184 for (auto *User : PHI->users()) {
7185 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
7186 }
7187 }
7188
7189 PHI->addIncoming(Result, LookupBB);
7190 }
7191
7192 Builder.CreateBr(CommonDest);
7193 if (DTU)
7194 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7195
7196 // Remove the switch.
7197 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7198 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7199 BasicBlock *Succ = SI->getSuccessor(i);
7200
7201 if (Succ == SI->getDefaultDest())
7202 continue;
7203 Succ->removePredecessor(BB);
7204 if (DTU && RemovedSuccessors.insert(Succ).second)
7205 Updates.push_back({DominatorTree::Delete, BB, Succ});
7206 }
7207 SI->eraseFromParent();
7208
7209 if (DTU)
7210 DTU->applyUpdates(Updates);
7211
7212 ++NumLookupTables;
7213 if (NeedMask)
7214 ++NumLookupTablesHoles;
7215 return true;
7216}
7217
7218/// Try to transform a switch that has "holes" in it to a contiguous sequence
7219/// of cases.
7220///
7221/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7222/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7223///
7224/// This converts a sparse switch into a dense switch which allows better
7225/// lowering and could also allow transforming into a lookup table.
7226static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7227 const DataLayout &DL,
7228 const TargetTransformInfo &TTI) {
7229 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7230 if (CondTy->getIntegerBitWidth() > 64 ||
7231 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7232 return false;
7233 // Only bother with this optimization if there are more than 3 switch cases;
7234 // SDAG will only bother creating jump tables for 4 or more cases.
7235 if (SI->getNumCases() < 4)
7236 return false;
7237
7238 // This transform is agnostic to the signedness of the input or case values. We
7239 // can treat the case values as signed or unsigned. We can optimize more common
7240 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7241 // as signed.
7243 for (const auto &C : SI->cases())
7244 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7245 llvm::sort(Values);
7246
7247 // If the switch is already dense, there's nothing useful to do here.
7248 if (isSwitchDense(Values))
7249 return false;
7250
7251 // First, transform the values such that they start at zero and ascend.
7252 int64_t Base = Values[0];
7253 for (auto &V : Values)
7254 V -= (uint64_t)(Base);
7255
7256 // Now we have signed numbers that have been shifted so that, given enough
7257 // precision, there are no negative values. Since the rest of the transform
7258 // is bitwise only, we switch now to an unsigned representation.
7259
7260 // This transform can be done speculatively because it is so cheap - it
7261 // results in a single rotate operation being inserted.
7262
7263 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7264 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7265 // less than 64.
7266 unsigned Shift = 64;
7267 for (auto &V : Values)
7268 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7269 assert(Shift < 64);
7270 if (Shift > 0)
7271 for (auto &V : Values)
7272 V = (int64_t)((uint64_t)V >> Shift);
7273
7274 if (!isSwitchDense(Values))
7275 // Transform didn't create a dense switch.
7276 return false;
7277
7278 // The obvious transform is to shift the switch condition right and emit a
7279 // check that the condition actually cleanly divided by GCD, i.e.
7280 // C & (1 << Shift - 1) == 0
7281 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7282 //
7283 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7284 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7285 // are nonzero then the switch condition will be very large and will hit the
7286 // default case.
7287
7288 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7289 Builder.SetInsertPoint(SI);
7290 Value *Sub =
7291 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7292 Value *Rot = Builder.CreateIntrinsic(
7293 Ty, Intrinsic::fshl,
7294 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7295 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7296
7297 for (auto Case : SI->cases()) {
7298 auto *Orig = Case.getCaseValue();
7299 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7300 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7301 }
7302 return true;
7303}
7304
7305/// Tries to transform switch of powers of two to reduce switch range.
7306/// For example, switch like:
7307/// switch (C) { case 1: case 2: case 64: case 128: }
7308/// will be transformed to:
7309/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7310///
7311/// This transformation allows better lowering and could allow transforming into
7312/// a lookup table.
7314 const DataLayout &DL,
7315 const TargetTransformInfo &TTI) {
7316 Value *Condition = SI->getCondition();
7317 LLVMContext &Context = SI->getContext();
7318 auto *CondTy = cast<IntegerType>(Condition->getType());
7319
7320 if (CondTy->getIntegerBitWidth() > 64 ||
7321 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7322 return false;
7323
7324 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7325 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7326 {Condition, ConstantInt::getTrue(Context)}),
7328
7329 if (CttzIntrinsicCost > TTI::TCC_Basic)
7330 // Inserting intrinsic is too expensive.
7331 return false;
7332
7333 // Only bother with this optimization if there are more than 3 switch cases.
7334 // SDAG will only bother creating jump tables for 4 or more cases.
7335 if (SI->getNumCases() < 4)
7336 return false;
7337
7338 // We perform this optimization only for switches with
7339 // unreachable default case.
7340 // This assumtion will save us from checking if `Condition` is a power of two.
7341 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7342 return false;
7343
7344 // Check that switch cases are powers of two.
7346 for (const auto &Case : SI->cases()) {
7347 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7348 if (llvm::has_single_bit(CaseValue))
7349 Values.push_back(CaseValue);
7350 else
7351 return false;
7352 }
7353
7354 // isSwichDense requires case values to be sorted.
7355 llvm::sort(Values);
7356 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7357 llvm::countr_zero(Values.front()) + 1))
7358 // Transform is unable to generate dense switch.
7359 return false;
7360
7361 Builder.SetInsertPoint(SI);
7362
7363 // Replace each case with its trailing zeros number.
7364 for (auto &Case : SI->cases()) {
7365 auto *OrigValue = Case.getCaseValue();
7366 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7367 OrigValue->getValue().countr_zero()));
7368 }
7369
7370 // Replace condition with its trailing zeros number.
7371 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7372 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7373
7374 SI->setCondition(ConditionTrailingZeros);
7375
7376 return true;
7377}
7378
7379/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7380/// the same destination.
7382 DomTreeUpdater *DTU) {
7383 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7384 if (!Cmp || !Cmp->hasOneUse())
7385 return false;
7386
7388 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7389 if (!HasWeights)
7390 Weights.resize(4); // Avoid checking HasWeights everywhere.
7391
7392 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7393 int64_t Res;
7394 BasicBlock *Succ, *OtherSucc;
7395 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7396 BasicBlock *Unreachable = nullptr;
7397
7398 if (SI->getNumCases() == 2) {
7399 // Find which of 1, 0 or -1 is missing (handled by default dest).
7400 SmallSet<int64_t, 3> Missing;
7401 Missing.insert(1);
7402 Missing.insert(0);
7403 Missing.insert(-1);
7404
7405 Succ = SI->getDefaultDest();
7406 SuccWeight = Weights[0];
7407 OtherSucc = nullptr;
7408 for (auto &Case : SI->cases()) {
7409 std::optional<int64_t> Val =
7410 Case.getCaseValue()->getValue().trySExtValue();
7411 if (!Val)
7412 return false;
7413 if (!Missing.erase(*Val))
7414 return false;
7415 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7416 return false;
7417 OtherSucc = Case.getCaseSuccessor();
7418 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7419 }
7420
7421 assert(Missing.size() == 1 && "Should have one case left");
7422 Res = *Missing.begin();
7423 } else if (SI->getNumCases() == 3 && SI->defaultDestUndefined()) {
7424 // Normalize so that Succ is taken once and OtherSucc twice.
7425 Unreachable = SI->getDefaultDest();
7426 Succ = OtherSucc = nullptr;
7427 for (auto &Case : SI->cases()) {
7428 BasicBlock *NewSucc = Case.getCaseSuccessor();
7429 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7430 if (!OtherSucc || OtherSucc == NewSucc) {
7431 OtherSucc = NewSucc;
7432 OtherSuccWeight += Weight;
7433 } else if (!Succ) {
7434 Succ = NewSucc;
7435 SuccWeight = Weight;
7436 } else if (Succ == NewSucc) {
7437 std::swap(Succ, OtherSucc);
7438 std::swap(SuccWeight, OtherSuccWeight);
7439 } else
7440 return false;
7441 }
7442 for (auto &Case : SI->cases()) {
7443 std::optional<int64_t> Val =
7444 Case.getCaseValue()->getValue().trySExtValue();
7445 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7446 return false;
7447 if (Case.getCaseSuccessor() == Succ) {
7448 Res = *Val;
7449 break;
7450 }
7451 }
7452 } else {
7453 return false;
7454 }
7455
7456 // Determine predicate for the missing case.
7458 switch (Res) {
7459 case 1:
7460 Pred = ICmpInst::ICMP_UGT;
7461 break;
7462 case 0:
7463 Pred = ICmpInst::ICMP_EQ;
7464 break;
7465 case -1:
7466 Pred = ICmpInst::ICMP_ULT;
7467 break;
7468 }
7469 if (Cmp->isSigned())
7470 Pred = ICmpInst::getSignedPredicate(Pred);
7471
7472 MDNode *NewWeights = nullptr;
7473 if (HasWeights)
7474 NewWeights = MDBuilder(SI->getContext())
7475 .createBranchWeights(SuccWeight, OtherSuccWeight);
7476
7477 BasicBlock *BB = SI->getParent();
7478 Builder.SetInsertPoint(SI->getIterator());
7479 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7480 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7481 SI->getMetadata(LLVMContext::MD_unpredictable));
7482 OtherSucc->removePredecessor(BB);
7483 if (Unreachable)
7484 Unreachable->removePredecessor(BB);
7485 SI->eraseFromParent();
7486 Cmp->eraseFromParent();
7487 if (DTU && Unreachable)
7488 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7489 return true;
7490}
7491
7492/// Checking whether two cases of SI are equal depends on the contents of the
7493/// BasicBlock and the incoming values of their successor PHINodes.
7494/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7495/// calling this function on each BasicBlock every time isEqual is called,
7496/// especially since the same BasicBlock may be passed as an argument multiple
7497/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7498/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7499/// of the incoming values.
7503};
7504
7505namespace llvm {
7506template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7508 return static_cast<SwitchSuccWrapper *>(
7510 }
7512 return static_cast<SwitchSuccWrapper *>(
7514 }
7515 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7516 BasicBlock *Succ = SSW->Dest;
7517 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7518 assert(BI->isUnconditional() &&
7519 "Only supporting unconditional branches for now");
7520 assert(BI->getNumSuccessors() == 1 &&
7521 "Expected unconditional branches to have one successor");
7522 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7523
7524 // Since we assume the BB is just a single BranchInst with a single
7525 // successor, we hash as the BB and the incoming Values of its successor
7526 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7527 // including the incoming PHI values leads to better performance.
7528 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7529 // time and passing it in SwitchSuccWrapper, but this slowed down the
7530 // average compile time without having any impact on the worst case compile
7531 // time.
7532 BasicBlock *BB = BI->getSuccessor(0);
7533 SmallVector<Value *> PhiValsForBB;
7534 for (PHINode &Phi : BB->phis())
7535 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7536
7537 return hash_combine(
7538 BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
7539 }
7540 static bool isEqual(const SwitchSuccWrapper *LHS,
7541 const SwitchSuccWrapper *RHS) {
7544 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7545 return LHS == RHS;
7546
7547 BasicBlock *A = LHS->Dest;
7548 BasicBlock *B = RHS->Dest;
7549
7550 // FIXME: we checked that the size of A and B are both 1 in
7551 // simplifyDuplicateSwitchArms to make the Case list smaller to
7552 // improve performance. If we decide to support BasicBlocks with more
7553 // than just a single instruction, we need to check that A.size() ==
7554 // B.size() here, and we need to check more than just the BranchInsts
7555 // for equality.
7556
7557 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7558 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7559 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7560 "Only supporting unconditional branches for now");
7561 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7562 return false;
7563
7564 // Need to check that PHIs in successor have matching values
7565 BasicBlock *Succ = ABI->getSuccessor(0);
7566 for (PHINode &Phi : Succ->phis()) {
7567 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7568 if (PredIVs[A] != PredIVs[B])
7569 return false;
7570 }
7571
7572 return true;
7573 }
7574};
7575} // namespace llvm
7576
7577bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7578 DomTreeUpdater *DTU) {
7579 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7580 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7581 // an entire PHI at once after the loop, opposed to calling
7582 // getIncomingValueForBlock inside this loop, since each call to
7583 // getIncomingValueForBlock is O(|Preds|).
7589 Cases.reserve(SI->getNumSuccessors());
7590
7591 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7592 BasicBlock *BB = SI->getSuccessor(I);
7593
7594 // FIXME: Support more than just a single BranchInst. One way we could do
7595 // this is by taking a hashing approach of all insts in BB.
7596 if (BB->size() != 1)
7597 continue;
7598
7599 // FIXME: This case needs some extra care because the terminators other than
7600 // SI need to be updated. For now, consider only backedges to the SI.
7601 if (BB->hasNPredecessorsOrMore(4) ||
7602 BB->getUniquePredecessor() != SI->getParent())
7603 continue;
7604
7605 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7606 // on other kinds of terminators. We decide to only support unconditional
7607 // branches for now for compile time reasons.
7608 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7609 if (!BI || BI->isConditional())
7610 continue;
7611
7612 if (Seen.insert(BB).second) {
7613 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7614 for (BasicBlock *Succ : BI->successors())
7615 for (PHINode &Phi : Succ->phis())
7616 Phis.insert(&Phi);
7617 // Add the successor only if not previously visited.
7618 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7619 }
7620
7621 BBToSuccessorIndexes[BB].emplace_back(I);
7622 }
7623
7624 // Precompute a data structure to improve performance of isEqual for
7625 // SwitchSuccWrapper.
7626 PhiPredIVs.reserve(Phis.size());
7627 for (PHINode *Phi : Phis) {
7628 PhiPredIVs[Phi] =
7629 SmallDenseMap<BasicBlock *, Value *, 8>(Phi->getNumIncomingValues());
7630 for (auto &IV : Phi->incoming_values())
7631 PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
7632 }
7633
7634 // Build a set such that if the SwitchSuccWrapper exists in the set and
7635 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7636 // which is not in the set should be replaced with the one in the set. If the
7637 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7638 // other SwitchSuccWrappers can check against it in the same manner. We use
7639 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7640 // around information to isEquality, getHashValue, and when doing the
7641 // replacement with better performance.
7643 ReplaceWith.reserve(Cases.size());
7644
7646 Updates.reserve(ReplaceWith.size());
7647 bool MadeChange = false;
7648 for (auto &SSW : Cases) {
7649 // SSW is a candidate for simplification. If we find a duplicate BB,
7650 // replace it.
7651 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7652 if (!Inserted) {
7653 // We know that SI's parent BB no longer dominates the old case successor
7654 // since we are making it dead.
7655 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7656 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7657 for (unsigned Idx : Successors)
7658 SI->setSuccessor(Idx, (*It)->Dest);
7659 MadeChange = true;
7660 }
7661 }
7662
7663 if (DTU)
7664 DTU->applyUpdates(Updates);
7665
7666 return MadeChange;
7667}
7668
7669bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7670 BasicBlock *BB = SI->getParent();
7671
7672 if (isValueEqualityComparison(SI)) {
7673 // If we only have one predecessor, and if it is a branch on this value,
7674 // see if that predecessor totally determines the outcome of this switch.
7675 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7676 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7677 return requestResimplify();
7678
7679 Value *Cond = SI->getCondition();
7680 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7681 if (simplifySwitchOnSelect(SI, Select))
7682 return requestResimplify();
7683
7684 // If the block only contains the switch, see if we can fold the block
7685 // away into any preds.
7686 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7687 if (foldValueComparisonIntoPredecessors(SI, Builder))
7688 return requestResimplify();
7689 }
7690
7691 // Try to transform the switch into an icmp and a branch.
7692 // The conversion from switch to comparison may lose information on
7693 // impossible switch values, so disable it early in the pipeline.
7694 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7695 return requestResimplify();
7696
7697 // Remove unreachable cases.
7698 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7699 return requestResimplify();
7700
7701 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7702 return requestResimplify();
7703
7704 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7705 return requestResimplify();
7706
7707 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7708 return requestResimplify();
7709
7710 // The conversion from switch to lookup tables results in difficult-to-analyze
7711 // code and makes pruning branches much harder. This is a problem if the
7712 // switch expression itself can still be restricted as a result of inlining or
7713 // CVP. Therefore, only apply this transformation during late stages of the
7714 // optimisation pipeline.
7715 if (Options.ConvertSwitchToLookupTable &&
7716 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7717 return requestResimplify();
7718
7719 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7720 return requestResimplify();
7721
7722 if (reduceSwitchRange(SI, Builder, DL, TTI))
7723 return requestResimplify();
7724
7725 if (HoistCommon &&
7726 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7727 return requestResimplify();
7728
7729 if (simplifyDuplicateSwitchArms(SI, DTU))
7730 return requestResimplify();
7731
7732 return false;
7733}
7734
7735bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7736 BasicBlock *BB = IBI->getParent();
7737 bool Changed = false;
7738
7739 // Eliminate redundant destinations.
7742 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7743 BasicBlock *Dest = IBI->getDestination(i);
7744 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7745 if (!Dest->hasAddressTaken())
7746 RemovedSuccs.insert(Dest);
7747 Dest->removePredecessor(BB);
7748 IBI->removeDestination(i);
7749 --i;
7750 --e;
7751 Changed = true;
7752 }
7753 }
7754
7755 if (DTU) {
7756 std::vector<DominatorTree::UpdateType> Updates;
7757 Updates.reserve(RemovedSuccs.size());
7758 for (auto *RemovedSucc : RemovedSuccs)
7759 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7760 DTU->applyUpdates(Updates);
7761 }
7762
7763 if (IBI->getNumDestinations() == 0) {
7764 // If the indirectbr has no successors, change it to unreachable.
7765 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7767 return true;
7768 }
7769
7770 if (IBI->getNumDestinations() == 1) {
7771 // If the indirectbr has one successor, change it to a direct branch.
7774 return true;
7775 }
7776
7777 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7778 if (simplifyIndirectBrOnSelect(IBI, SI))
7779 return requestResimplify();
7780 }
7781 return Changed;
7782}
7783
7784/// Given an block with only a single landing pad and a unconditional branch
7785/// try to find another basic block which this one can be merged with. This
7786/// handles cases where we have multiple invokes with unique landing pads, but
7787/// a shared handler.
7788///
7789/// We specifically choose to not worry about merging non-empty blocks
7790/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7791/// practice, the optimizer produces empty landing pad blocks quite frequently
7792/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7793/// sinking in this file)
7794///
7795/// This is primarily a code size optimization. We need to avoid performing
7796/// any transform which might inhibit optimization (such as our ability to
7797/// specialize a particular handler via tail commoning). We do this by not
7798/// merging any blocks which require us to introduce a phi. Since the same
7799/// values are flowing through both blocks, we don't lose any ability to
7800/// specialize. If anything, we make such specialization more likely.
7801///
7802/// TODO - This transformation could remove entries from a phi in the target
7803/// block when the inputs in the phi are the same for the two blocks being
7804/// merged. In some cases, this could result in removal of the PHI entirely.
7806 BasicBlock *BB, DomTreeUpdater *DTU) {
7807 auto Succ = BB->getUniqueSuccessor();
7808 assert(Succ);
7809 // If there's a phi in the successor block, we'd likely have to introduce
7810 // a phi into the merged landing pad block.
7811 if (isa<PHINode>(*Succ->begin()))
7812 return false;
7813
7814 for (BasicBlock *OtherPred : predecessors(Succ)) {
7815 if (BB == OtherPred)
7816 continue;
7817 BasicBlock::iterator I = OtherPred->begin();
7818 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7819 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7820 continue;
7821 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7822 ;
7823 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7824 if (!BI2 || !BI2->isIdenticalTo(BI))
7825 continue;
7826
7827 std::vector<DominatorTree::UpdateType> Updates;
7828
7829 // We've found an identical block. Update our predecessors to take that
7830 // path instead and make ourselves dead.
7832 for (BasicBlock *Pred : UniquePreds) {
7833 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7834 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7835 "unexpected successor");
7836 II->setUnwindDest(OtherPred);
7837 if (DTU) {
7838 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7839 Updates.push_back({DominatorTree::Delete, Pred, BB});
7840 }
7841 }
7842
7843 // The debug info in OtherPred doesn't cover the merged control flow that
7844 // used to go through BB. We need to delete it or update it.
7845 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7846 if (isa<DbgInfoIntrinsic>(Inst))
7847 Inst.eraseFromParent();
7848
7850 for (BasicBlock *Succ : UniqueSuccs) {
7851 Succ->removePredecessor(BB);
7852 if (DTU)
7853 Updates.push_back({DominatorTree::Delete, BB, Succ});
7854 }
7855
7856 IRBuilder<> Builder(BI);
7857 Builder.CreateUnreachable();
7858 BI->eraseFromParent();
7859 if (DTU)
7860 DTU->applyUpdates(Updates);
7861 return true;
7862 }
7863 return false;
7864}
7865
7866bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7867 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7868 : simplifyCondBranch(Branch, Builder);
7869}
7870
7871bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7872 IRBuilder<> &Builder) {
7873 BasicBlock *BB = BI->getParent();
7874 BasicBlock *Succ = BI->getSuccessor(0);
7875
7876 // If the Terminator is the only non-phi instruction, simplify the block.
7877 // If LoopHeader is provided, check if the block or its successor is a loop
7878 // header. (This is for early invocations before loop simplify and
7879 // vectorization to keep canonical loop forms for nested loops. These blocks
7880 // can be eliminated when the pass is invoked later in the back-end.)
7881 // Note that if BB has only one predecessor then we do not introduce new
7882 // backedge, so we can eliminate BB.
7883 bool NeedCanonicalLoop =
7884 Options.NeedCanonicalLoop &&
7885 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7886 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7888 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7889 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7890 return true;
7891
7892 // If the only instruction in the block is a seteq/setne comparison against a
7893 // constant, try to simplify the block.
7894 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7895 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7896 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7897 ;
7898 if (I->isTerminator() &&
7899 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7900 return true;
7901 }
7902
7903 // See if we can merge an empty landing pad block with another which is
7904 // equivalent.
7905 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7906 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7907 ;
7908 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7909 return true;
7910 }
7911
7912 // If this basic block is ONLY a compare and a branch, and if a predecessor
7913 // branches to us and our successor, fold the comparison into the
7914 // predecessor and use logical operations to update the incoming value
7915 // for PHI nodes in common successor.
7916 if (Options.SpeculateBlocks &&
7917 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7918 Options.BonusInstThreshold))
7919 return requestResimplify();
7920 return false;
7921}
7922
7924 BasicBlock *PredPred = nullptr;
7925 for (auto *P : predecessors(BB)) {
7926 BasicBlock *PPred = P->getSinglePredecessor();
7927 if (!PPred || (PredPred && PredPred != PPred))
7928 return nullptr;
7929 PredPred = PPred;
7930 }
7931 return PredPred;
7932}
7933
7934/// Fold the following pattern:
7935/// bb0:
7936/// br i1 %cond1, label %bb1, label %bb2
7937/// bb1:
7938/// br i1 %cond2, label %bb3, label %bb4
7939/// bb2:
7940/// br i1 %cond2, label %bb4, label %bb3
7941/// bb3:
7942/// ...
7943/// bb4:
7944/// ...
7945/// into
7946/// bb0:
7947/// %cond = xor i1 %cond1, %cond2
7948/// br i1 %cond, label %bb4, label %bb3
7949/// bb3:
7950/// ...
7951/// bb4:
7952/// ...
7953/// NOTE: %cond2 always dominates the terminator of bb0.
7955 BasicBlock *BB = BI->getParent();
7956 BasicBlock *BB1 = BI->getSuccessor(0);
7957 BasicBlock *BB2 = BI->getSuccessor(1);
7958 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7959 if (Succ == BB)
7960 return false;
7961 if (&Succ->front() != Succ->getTerminator())
7962 return false;
7963 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7964 if (!SuccBI || !SuccBI->isConditional())
7965 return false;
7966 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7967 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7968 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7969 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7970 };
7971 BranchInst *BB1BI, *BB2BI;
7972 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7973 return false;
7974
7975 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7976 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7977 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7978 return false;
7979
7980 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7981 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7982 IRBuilder<> Builder(BI);
7983 BI->setCondition(
7984 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7985 BB1->removePredecessor(BB);
7986 BI->setSuccessor(0, BB4);
7987 BB2->removePredecessor(BB);
7988 BI->setSuccessor(1, BB3);
7989 if (DTU) {
7991 Updates.push_back({DominatorTree::Delete, BB, BB1});
7992 Updates.push_back({DominatorTree::Insert, BB, BB4});
7993 Updates.push_back({DominatorTree::Delete, BB, BB2});
7994 Updates.push_back({DominatorTree::Insert, BB, BB3});
7995
7996 DTU->applyUpdates(Updates);
7997 }
7998 bool HasWeight = false;
7999 uint64_t BBTWeight, BBFWeight;
8000 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8001 HasWeight = true;
8002 else
8003 BBTWeight = BBFWeight = 1;
8004 uint64_t BB1TWeight, BB1FWeight;
8005 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8006 HasWeight = true;
8007 else
8008 BB1TWeight = BB1FWeight = 1;
8009 uint64_t BB2TWeight, BB2FWeight;
8010 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8011 HasWeight = true;
8012 else
8013 BB2TWeight = BB2FWeight = 1;
8014 if (HasWeight) {
8015 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8016 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8017 fitWeights(Weights);
8018 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8019 }
8020 return true;
8021}
8022
8023bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8024 assert(
8025 !isa<ConstantInt>(BI->getCondition()) &&
8026 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8027 "Tautological conditional branch should have been eliminated already.");
8028
8029 BasicBlock *BB = BI->getParent();
8030 if (!Options.SimplifyCondBranch ||
8031 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8032 return false;
8033
8034 // Conditional branch
8035 if (isValueEqualityComparison(BI)) {
8036 // If we only have one predecessor, and if it is a branch on this value,
8037 // see if that predecessor totally determines the outcome of this
8038 // switch.
8039 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8040 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8041 return requestResimplify();
8042
8043 // This block must be empty, except for the setcond inst, if it exists.
8044 // Ignore dbg and pseudo intrinsics.
8045 auto I = BB->instructionsWithoutDebug(true).begin();
8046 if (&*I == BI) {
8047 if (foldValueComparisonIntoPredecessors(BI, Builder))
8048 return requestResimplify();
8049 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8050 ++I;
8051 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8052 return requestResimplify();
8053 }
8054 }
8055
8056 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8057 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8058 return true;
8059
8060 // If this basic block has dominating predecessor blocks and the dominating
8061 // blocks' conditions imply BI's condition, we know the direction of BI.
8062 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8063 if (Imp) {
8064 // Turn this into a branch on constant.
8065 auto *OldCond = BI->getCondition();
8066 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8067 : ConstantInt::getFalse(BB->getContext());
8068 BI->setCondition(TorF);
8070 return requestResimplify();
8071 }
8072
8073 // If this basic block is ONLY a compare and a branch, and if a predecessor
8074 // branches to us and one of our successors, fold the comparison into the
8075 // predecessor and use logical operations to pick the right destination.
8076 if (Options.SpeculateBlocks &&
8077 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8078 Options.BonusInstThreshold))
8079 return requestResimplify();
8080
8081 // We have a conditional branch to two blocks that are only reachable
8082 // from BI. We know that the condbr dominates the two blocks, so see if
8083 // there is any identical code in the "then" and "else" blocks. If so, we
8084 // can hoist it up to the branching block.
8085 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8086 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8087 if (HoistCommon &&
8088 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8089 return requestResimplify();
8090
8092 Options.HoistLoadsStoresWithCondFaulting &&
8093 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8094 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8095 auto CanSpeculateConditionalLoadsStores = [&]() {
8096 for (auto *Succ : successors(BB)) {
8097 for (Instruction &I : *Succ) {
8098 if (I.isTerminator()) {
8099 if (I.getNumSuccessors() > 1)
8100 return false;
8101 continue;
8102 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8103 SpeculatedConditionalLoadsStores.size() ==
8105 return false;
8106 }
8107 SpeculatedConditionalLoadsStores.push_back(&I);
8108 }
8109 }
8110 return !SpeculatedConditionalLoadsStores.empty();
8111 };
8112
8113 if (CanSpeculateConditionalLoadsStores()) {
8114 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8115 std::nullopt);
8116 return requestResimplify();
8117 }
8118 }
8119 } else {
8120 // If Successor #1 has multiple preds, we may be able to conditionally
8121 // execute Successor #0 if it branches to Successor #1.
8122 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8123 if (Succ0TI->getNumSuccessors() == 1 &&
8124 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8125 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8126 return requestResimplify();
8127 }
8128 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8129 // If Successor #0 has multiple preds, we may be able to conditionally
8130 // execute Successor #1 if it branches to Successor #0.
8131 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8132 if (Succ1TI->getNumSuccessors() == 1 &&
8133 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8134 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8135 return requestResimplify();
8136 }
8137
8138 // If this is a branch on something for which we know the constant value in
8139 // predecessors (e.g. a phi node in the current block), thread control
8140 // through this block.
8142 return requestResimplify();
8143
8144 // Scan predecessor blocks for conditional branches.
8145 for (BasicBlock *Pred : predecessors(BB))
8146 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8147 if (PBI != BI && PBI->isConditional())
8148 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8149 return requestResimplify();
8150
8151 // Look for diamond patterns.
8152 if (MergeCondStores)
8154 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8155 if (PBI != BI && PBI->isConditional())
8156 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8157 return requestResimplify();
8158
8159 // Look for nested conditional branches.
8160 if (mergeNestedCondBranch(BI, DTU))
8161 return requestResimplify();
8162
8163 return false;
8164}
8165
8166/// Check if passing a value to an instruction will cause undefined behavior.
8167static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8168 Constant *C = dyn_cast<Constant>(V);
8169 if (!C)
8170 return false;
8171
8172 if (I->use_empty())
8173 return false;
8174
8175 if (C->isNullValue() || isa<UndefValue>(C)) {
8176 // Only look at the first use we can handle, avoid hurting compile time with
8177 // long uselists
8178 auto FindUse = llvm::find_if(I->users(), [](auto *U) {
8179 auto *Use = cast<Instruction>(U);
8180 // Change this list when we want to add new instructions.
8181 switch (Use->getOpcode()) {
8182 default:
8183 return false;
8184 case Instruction::GetElementPtr:
8185 case Instruction::Ret:
8186 case Instruction::BitCast:
8187 case Instruction::Load:
8188 case Instruction::Store:
8189 case Instruction::Call:
8190 case Instruction::CallBr:
8191 case Instruction::Invoke:
8192 case Instruction::UDiv:
8193 case Instruction::URem:
8194 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8195 // implemented to avoid code complexity as it is unclear how useful such
8196 // logic is.
8197 case Instruction::SDiv:
8198 case Instruction::SRem:
8199 return true;
8200 }
8201 });
8202 if (FindUse == I->user_end())
8203 return false;
8204 auto *Use = cast<Instruction>(*FindUse);
8205 // Bail out if Use is not in the same BB as I or Use == I or Use comes
8206 // before I in the block. The latter two can be the case if Use is a
8207 // PHI node.
8208 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8209 return false;
8210
8211 // Now make sure that there are no instructions in between that can alter
8212 // control flow (eg. calls)
8213 auto InstrRange =
8214 make_range(std::next(I->getIterator()), Use->getIterator());
8215 if (any_of(InstrRange, [](Instruction &I) {
8217 }))
8218 return false;
8219
8220 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8221 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
8222 if (GEP->getPointerOperand() == I) {
8223 // The current base address is null, there are four cases to consider:
8224 // getelementptr (TY, null, 0) -> null
8225 // getelementptr (TY, null, not zero) -> may be modified
8226 // getelementptr inbounds (TY, null, 0) -> null
8227 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8228 // undefined?
8229 if (!GEP->hasAllZeroIndices() &&
8230 (!GEP->isInBounds() ||
8231 NullPointerIsDefined(GEP->getFunction(),
8232 GEP->getPointerAddressSpace())))
8233 PtrValueMayBeModified = true;
8234 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8235 }
8236
8237 // Look through return.
8238 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
8239 bool HasNoUndefAttr =
8240 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8241 // Return undefined to a noundef return value is undefined.
8242 if (isa<UndefValue>(C) && HasNoUndefAttr)
8243 return true;
8244 // Return null to a nonnull+noundef return value is undefined.
8245 if (C->isNullValue() && HasNoUndefAttr &&
8246 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8247 return !PtrValueMayBeModified;
8248 }
8249 }
8250
8251 // Load from null is undefined.
8252 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
8253 if (!LI->isVolatile())
8254 return !NullPointerIsDefined(LI->getFunction(),
8255 LI->getPointerAddressSpace());
8256
8257 // Store to null is undefined.
8258 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
8259 if (!SI->isVolatile())
8260 return (!NullPointerIsDefined(SI->getFunction(),
8261 SI->getPointerAddressSpace())) &&
8262 SI->getPointerOperand() == I;
8263
8264 // llvm.assume(false/undef) always triggers immediate UB.
8265 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
8266 // Ignore assume operand bundles.
8267 if (I == Assume->getArgOperand(0))
8268 return true;
8269 }
8270
8271 if (auto *CB = dyn_cast<CallBase>(Use)) {
8272 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8273 return false;
8274 // A call to null is undefined.
8275 if (CB->getCalledOperand() == I)
8276 return true;
8277
8278 if (C->isNullValue()) {
8279 for (const llvm::Use &Arg : CB->args())
8280 if (Arg == I) {
8281 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8282 if (CB->isPassingUndefUB(ArgIdx) &&
8283 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
8284 // Passing null to a nonnnull+noundef argument is undefined.
8285 return !PtrValueMayBeModified;
8286 }
8287 }
8288 } else if (isa<UndefValue>(C)) {
8289 // Passing undef to a noundef argument is undefined.
8290 for (const llvm::Use &Arg : CB->args())
8291 if (Arg == I) {
8292 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8293 if (CB->isPassingUndefUB(ArgIdx)) {
8294 // Passing undef to a noundef argument is undefined.
8295 return true;
8296 }
8297 }
8298 }
8299 }
8300 // Div/Rem by zero is immediate UB
8301 if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem())
8302 return true;
8303 }
8304 return false;
8305}
8306
8307/// If BB has an incoming value that will always trigger undefined behavior
8308/// (eg. null pointer dereference), remove the branch leading here.
8310 DomTreeUpdater *DTU,
8311 AssumptionCache *AC) {
8312 for (PHINode &PHI : BB->phis())
8313 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8314 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8315 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8316 Instruction *T = Predecessor->getTerminator();
8317 IRBuilder<> Builder(T);
8318 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8319 BB->removePredecessor(Predecessor);
8320 // Turn unconditional branches into unreachables and remove the dead
8321 // destination from conditional branches.
8322 if (BI->isUnconditional())
8323 Builder.CreateUnreachable();
8324 else {
8325 // Preserve guarding condition in assume, because it might not be
8326 // inferrable from any dominating condition.
8327 Value *Cond = BI->getCondition();
8328 CallInst *Assumption;
8329 if (BI->getSuccessor(0) == BB)
8330 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8331 else
8332 Assumption = Builder.CreateAssumption(Cond);
8333 if (AC)
8334 AC->registerAssumption(cast<AssumeInst>(Assumption));
8335 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8336 : BI->getSuccessor(0));
8337 }
8338 BI->eraseFromParent();
8339 if (DTU)
8340 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8341 return true;
8342 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8343 // Redirect all branches leading to UB into
8344 // a newly created unreachable block.
8345 BasicBlock *Unreachable = BasicBlock::Create(
8346 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8347 Builder.SetInsertPoint(Unreachable);
8348 // The new block contains only one instruction: Unreachable
8349 Builder.CreateUnreachable();
8350 for (const auto &Case : SI->cases())
8351 if (Case.getCaseSuccessor() == BB) {
8352 BB->removePredecessor(Predecessor);
8353 Case.setSuccessor(Unreachable);
8354 }
8355 if (SI->getDefaultDest() == BB) {
8356 BB->removePredecessor(Predecessor);
8357 SI->setDefaultDest(Unreachable);
8358 }
8359
8360 if (DTU)
8361 DTU->applyUpdates(
8362 { { DominatorTree::Insert, Predecessor, Unreachable },
8363 { DominatorTree::Delete, Predecessor, BB } });
8364 return true;
8365 }
8366 }
8367
8368 return false;
8369}
8370
8371bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8372 bool Changed = false;
8373
8374 assert(BB && BB->getParent() && "Block not embedded in function!");
8375 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8376
8377 // Remove basic blocks that have no predecessors (except the entry block)...
8378 // or that just have themself as a predecessor. These are unreachable.
8379 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8380 BB->getSinglePredecessor() == BB) {
8381 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8382 DeleteDeadBlock(BB, DTU);
8383 return true;
8384 }
8385
8386 // Check to see if we can constant propagate this terminator instruction
8387 // away...
8388 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8389 /*TLI=*/nullptr, DTU);
8390
8391 // Check for and eliminate duplicate PHI nodes in this block.
8392 Changed |= EliminateDuplicatePHINodes(BB);
8393
8394 // Check for and remove branches that will always cause undefined behavior.
8396 return requestResimplify();
8397
8398 // Merge basic blocks into their predecessor if there is only one distinct
8399 // pred, and if there is only one distinct successor of the predecessor, and
8400 // if there are no PHI nodes.
8401 if (MergeBlockIntoPredecessor(BB, DTU))
8402 return true;
8403
8404 if (SinkCommon && Options.SinkCommonInsts)
8405 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8406 mergeCompatibleInvokes(BB, DTU)) {
8407 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8408 // so we may now how duplicate PHI's.
8409 // Let's rerun EliminateDuplicatePHINodes() first,
8410 // before foldTwoEntryPHINode() potentially converts them into select's,
8411 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8412 return true;
8413 }
8414
8415 IRBuilder<> Builder(BB);
8416
8417 if (Options.SpeculateBlocks &&
8418 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8419 // If there is a trivial two-entry PHI node in this basic block, and we can
8420 // eliminate it, do so now.
8421 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8422 if (PN->getNumIncomingValues() == 2)
8423 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8424 Options.SpeculateUnpredictables))
8425 return true;
8426 }
8427
8429 Builder.SetInsertPoint(Terminator);
8430 switch (Terminator->getOpcode()) {
8431 case Instruction::Br:
8432 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8433 break;
8434 case Instruction::Resume:
8435 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8436 break;
8437 case Instruction::CleanupRet:
8438 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8439 break;
8440 case Instruction::Switch:
8441 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8442 break;
8443 case Instruction::Unreachable:
8444 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8445 break;
8446 case Instruction::IndirectBr:
8447 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8448 break;
8449 }
8450
8451 return Changed;
8452}
8453
8454bool SimplifyCFGOpt::run(BasicBlock *BB) {
8455 bool Changed = false;
8456
8457 // Repeated simplify BB as long as resimplification is requested.
8458 do {
8459 Resimplify = false;
8460
8461 // Perform one round of simplifcation. Resimplify flag will be set if
8462 // another iteration is requested.
8463 Changed |= simplifyOnce(BB);
8464 } while (Resimplify);
8465
8466 return Changed;
8467}
8468
8471 ArrayRef<WeakVH> LoopHeaders) {
8472 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8473 Options)
8474 .run(BB);
8475}
#define Fail
#define Success
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< bool > HoistLoadsStoresWithCondFaulting("simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting"))
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert)
If the target supports conditional faulting, we look for the following pattern:
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditonal load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1554
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1915
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:177
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:378
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:717
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
size_t size() const
Definition: BasicBlock.h:469
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
The address of a basic block.
Definition: Constants.h:893
BasicBlock * getBasicBlock() const
Definition: Constants.h:924
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1576
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2625
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isNegative() const
Definition: Constants.h:203
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:187
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
iterator begin()
Definition: Function.h:853
size_t size() const
Definition: Function.h:858
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:108
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2280
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2045
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1300
Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:521
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1043
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:189
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2568
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1474
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:234
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:247
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:500
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1751
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1181
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2264
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1381
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2146
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1158
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1792
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2027
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1512
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1805
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1364
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2136
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2013
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1534
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1665
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1152
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2219
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:194
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1556
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2374
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1398
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2699
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:390
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:906
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:949
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:176
static unsigned getPointerOperandIndex()
Definition: Instructions.h:257
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Align getAlign() const
Definition: Instructions.h:333
bool isSimple() const
Definition: Instructions.h:370
Value * getValueOperand()
Definition: Instructions.h:378
bool isUnordered() const
Definition: Instructions.h:372
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:234
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:886
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
static constexpr uint64_t MaximumAlignment
Definition: Value.h:811
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:90
size_type size() const
Definition: DenseSet.h:81
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1866
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1880
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1156
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1439
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3236
auto succ_size(const MachineBasicBlock *BB)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3439
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3722
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4225
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:235
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1624
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2067
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1524
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254