LLVM 20.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
78#include <algorithm>
79#include <cassert>
80#include <climits>
81#include <cstddef>
82#include <cstdint>
83#include <iterator>
84#include <map>
85#include <optional>
86#include <set>
87#include <tuple>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "simplifycfg"
95
97 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
98
99 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
100 "into preserving DomTree,"));
101
102// Chosen as 2 so as to be cheap, but still to have enough power to fold
103// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
104// To catch this, we need to fold a compare and a select, hence '2' being the
105// minimum reasonable default.
107 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
108 cl::desc(
109 "Control the amount of phi node folding to perform (default = 2)"));
110
112 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
113 cl::desc("Control the maximal total instruction cost that we are willing "
114 "to speculatively execute to fold a 2-entry PHI node into a "
115 "select (default = 4)"));
116
117static cl::opt<bool>
118 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
119 cl::desc("Hoist common instructions up to the parent block"));
120
122 "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
123 cl::init(true),
124 cl::desc("Hoist loads/stores if the target supports "
125 "conditional faulting"));
126
128 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
129 cl::desc("Control the maximal conditonal load/store that we are willing "
130 "to speculatively execute to eliminate conditional branch "
131 "(default = 6)"));
132
134 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
135 cl::init(20),
136 cl::desc("Allow reordering across at most this many "
137 "instructions when hoisting"));
138
139static cl::opt<bool>
140 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
141 cl::desc("Sink common instructions down to the end block"));
142
144 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
145 cl::desc("Hoist conditional stores if an unconditional store precedes"));
146
148 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
149 cl::desc("Hoist conditional stores even if an unconditional store does not "
150 "precede - hoist multiple conditional stores into a single "
151 "predicated store"));
152
154 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
155 cl::desc("When merging conditional stores, do so even if the resultant "
156 "basic blocks are unlikely to be if-converted as a result"));
157
159 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
160 cl::desc("Allow exactly one expensive instruction to be speculatively "
161 "executed"));
162
164 "max-speculation-depth", cl::Hidden, cl::init(10),
165 cl::desc("Limit maximum recursion depth when calculating costs of "
166 "speculatively executed instructions"));
167
168static cl::opt<int>
169 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
170 cl::init(10),
171 cl::desc("Max size of a block which is still considered "
172 "small enough to thread through"));
173
174// Two is chosen to allow one negation and a logical combine.
176 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
177 cl::init(2),
178 cl::desc("Maximum cost of combining conditions when "
179 "folding branches"));
180
182 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
183 cl::init(2),
184 cl::desc("Multiplier to apply to threshold when determining whether or not "
185 "to fold branch to common destination when vector operations are "
186 "present"));
187
189 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
190 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
191
193 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
194 cl::desc("Limit cases to analyze when converting a switch to select"));
195
196STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
197STATISTIC(NumLinearMaps,
198 "Number of switch instructions turned into linear mapping");
199STATISTIC(NumLookupTables,
200 "Number of switch instructions turned into lookup tables");
202 NumLookupTablesHoles,
203 "Number of switch instructions turned into lookup tables (holes checked)");
204STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
205STATISTIC(NumFoldValueComparisonIntoPredecessors,
206 "Number of value comparisons folded into predecessor basic blocks");
207STATISTIC(NumFoldBranchToCommonDest,
208 "Number of branches folded into predecessor basic block");
210 NumHoistCommonCode,
211 "Number of common instruction 'blocks' hoisted up to the begin block");
212STATISTIC(NumHoistCommonInstrs,
213 "Number of common instructions hoisted up to the begin block");
214STATISTIC(NumSinkCommonCode,
215 "Number of common instruction 'blocks' sunk down to the end block");
216STATISTIC(NumSinkCommonInstrs,
217 "Number of common instructions sunk down to the end block");
218STATISTIC(NumSpeculations, "Number of speculative executed instructions");
219STATISTIC(NumInvokes,
220 "Number of invokes with empty resume blocks simplified into calls");
221STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
222STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
223
224namespace {
225
226// The first field contains the value that the switch produces when a certain
227// case group is selected, and the second field is a vector containing the
228// cases composing the case group.
229using SwitchCaseResultVectorTy =
231
232// The first field contains the phi node that generates a result of the switch
233// and the second field contains the value generated for a certain case in the
234// switch for that PHI.
235using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
236
237/// ValueEqualityComparisonCase - Represents a case of a switch.
238struct ValueEqualityComparisonCase {
240 BasicBlock *Dest;
241
242 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
243 : Value(Value), Dest(Dest) {}
244
245 bool operator<(ValueEqualityComparisonCase RHS) const {
246 // Comparing pointers is ok as we only rely on the order for uniquing.
247 return Value < RHS.Value;
248 }
249
250 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
251};
252
253class SimplifyCFGOpt {
255 DomTreeUpdater *DTU;
256 const DataLayout &DL;
257 ArrayRef<WeakVH> LoopHeaders;
258 const SimplifyCFGOptions &Options;
259 bool Resimplify;
260
261 Value *isValueEqualityComparison(Instruction *TI);
262 BasicBlock *getValueEqualityComparisonCases(
263 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
264 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
265 BasicBlock *Pred,
266 IRBuilder<> &Builder);
267 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
268 Instruction *PTI,
269 IRBuilder<> &Builder);
270 bool foldValueComparisonIntoPredecessors(Instruction *TI,
271 IRBuilder<> &Builder);
272
273 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
274 bool simplifySingleResume(ResumeInst *RI);
275 bool simplifyCommonResume(ResumeInst *RI);
276 bool simplifyCleanupReturn(CleanupReturnInst *RI);
277 bool simplifyUnreachable(UnreachableInst *UI);
278 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
279 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
280 bool simplifyIndirectBr(IndirectBrInst *IBI);
281 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
282 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
283 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
284
285 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
286 IRBuilder<> &Builder);
287
288 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
289 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
290 Instruction *TI, Instruction *I1,
291 SmallVectorImpl<Instruction *> &OtherSuccTIs);
292 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
293 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
294 BasicBlock *TrueBB, BasicBlock *FalseBB,
295 uint32_t TrueWeight, uint32_t FalseWeight);
296 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
297 const DataLayout &DL);
298 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
299 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
300 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
301
302public:
303 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
304 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
305 const SimplifyCFGOptions &Opts)
306 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
307 assert((!DTU || !DTU->hasPostDomTree()) &&
308 "SimplifyCFG is not yet capable of maintaining validity of a "
309 "PostDomTree, so don't ask for it.");
310 }
311
312 bool simplifyOnce(BasicBlock *BB);
313 bool run(BasicBlock *BB);
314
315 // Helper to set Resimplify and return change indication.
316 bool requestResimplify() {
317 Resimplify = true;
318 return true;
319 }
320};
321
322} // end anonymous namespace
323
324/// Return true if all the PHI nodes in the basic block \p BB
325/// receive compatible (identical) incoming values when coming from
326/// all of the predecessor blocks that are specified in \p IncomingBlocks.
327///
328/// Note that if the values aren't exactly identical, but \p EquivalenceSet
329/// is provided, and *both* of the values are present in the set,
330/// then they are considered equal.
332 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
333 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
334 assert(IncomingBlocks.size() == 2 &&
335 "Only for a pair of incoming blocks at the time!");
336
337 // FIXME: it is okay if one of the incoming values is an `undef` value,
338 // iff the other incoming value is guaranteed to be a non-poison value.
339 // FIXME: it is okay if one of the incoming values is a `poison` value.
340 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
341 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
342 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
343 if (IV0 == IV1)
344 return true;
345 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
346 EquivalenceSet->contains(IV1))
347 return true;
348 return false;
349 });
350}
351
352/// Return true if it is safe to merge these two
353/// terminator instructions together.
354static bool
356 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
357 if (SI1 == SI2)
358 return false; // Can't merge with self!
359
360 // It is not safe to merge these two switch instructions if they have a common
361 // successor, and if that successor has a PHI node, and if *that* PHI node has
362 // conflicting incoming values from the two switch blocks.
363 BasicBlock *SI1BB = SI1->getParent();
364 BasicBlock *SI2BB = SI2->getParent();
365
366 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
367 bool Fail = false;
368 for (BasicBlock *Succ : successors(SI2BB)) {
369 if (!SI1Succs.count(Succ))
370 continue;
371 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
372 continue;
373 Fail = true;
374 if (FailBlocks)
375 FailBlocks->insert(Succ);
376 else
377 break;
378 }
379
380 return !Fail;
381}
382
383/// Update PHI nodes in Succ to indicate that there will now be entries in it
384/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
385/// will be the same as those coming in from ExistPred, an existing predecessor
386/// of Succ.
387static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
388 BasicBlock *ExistPred,
389 MemorySSAUpdater *MSSAU = nullptr) {
390 for (PHINode &PN : Succ->phis())
391 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
392 if (MSSAU)
393 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
394 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
395}
396
397/// Compute an abstract "cost" of speculating the given instruction,
398/// which is assumed to be safe to speculate. TCC_Free means cheap,
399/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
400/// expensive.
402 const TargetTransformInfo &TTI) {
404}
405
406/// If we have a merge point of an "if condition" as accepted above,
407/// return true if the specified value dominates the block. We don't handle
408/// the true generality of domination here, just a special case which works
409/// well enough for us.
410///
411/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
412/// see if V (which must be an instruction) and its recursive operands
413/// that do not dominate BB have a combined cost lower than Budget and
414/// are non-trapping. If both are true, the instruction is inserted into the
415/// set and true is returned.
416///
417/// The cost for most non-trapping instructions is defined as 1 except for
418/// Select whose cost is 2.
419///
420/// After this function returns, Cost is increased by the cost of
421/// V plus its non-dominating operands. If that cost is greater than
422/// Budget, false is returned and Cost is undefined.
423static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
424 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
427 AssumptionCache *AC, unsigned Depth = 0) {
428 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
429 // so limit the recursion depth.
430 // TODO: While this recursion limit does prevent pathological behavior, it
431 // would be better to track visited instructions to avoid cycles.
433 return false;
434
435 Instruction *I = dyn_cast<Instruction>(V);
436 if (!I) {
437 // Non-instructions dominate all instructions and can be executed
438 // unconditionally.
439 return true;
440 }
441 BasicBlock *PBB = I->getParent();
442
443 // We don't want to allow weird loops that might have the "if condition" in
444 // the bottom of this block.
445 if (PBB == BB)
446 return false;
447
448 // If this instruction is defined in a block that contains an unconditional
449 // branch to BB, then it must be in the 'conditional' part of the "if
450 // statement". If not, it definitely dominates the region.
451 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
452 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
453 return true;
454
455 // If we have seen this instruction before, don't count it again.
456 if (AggressiveInsts.count(I))
457 return true;
458
459 // Okay, it looks like the instruction IS in the "condition". Check to
460 // see if it's a cheap instruction to unconditionally compute, and if it
461 // only uses stuff defined outside of the condition. If so, hoist it out.
462 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
463 return false;
464
466
467 // Allow exactly one instruction to be speculated regardless of its cost
468 // (as long as it is safe to do so).
469 // This is intended to flatten the CFG even if the instruction is a division
470 // or other expensive operation. The speculation of an expensive instruction
471 // is expected to be undone in CodeGenPrepare if the speculation has not
472 // enabled further IR optimizations.
473 if (Cost > Budget &&
474 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
475 !Cost.isValid()))
476 return false;
477
478 // Okay, we can only really hoist these out if their operands do
479 // not take us over the cost threshold.
480 for (Use &Op : I->operands())
481 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
482 TTI, AC, Depth + 1))
483 return false;
484 // Okay, it's safe to do this! Remember this instruction.
485 AggressiveInsts.insert(I);
486 return true;
487}
488
489/// Extract ConstantInt from value, looking through IntToPtr
490/// and PointerNullValue. Return NULL if value is not a constant int.
492 // Normal constant int.
493 ConstantInt *CI = dyn_cast<ConstantInt>(V);
494 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
495 DL.isNonIntegralPointerType(V->getType()))
496 return CI;
497
498 // This is some kind of pointer constant. Turn it into a pointer-sized
499 // ConstantInt if possible.
500 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
501
502 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
503 if (isa<ConstantPointerNull>(V))
504 return ConstantInt::get(PtrTy, 0);
505
506 // IntToPtr const int.
507 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
508 if (CE->getOpcode() == Instruction::IntToPtr)
509 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
510 // The constant is very likely to have the right type already.
511 if (CI->getType() == PtrTy)
512 return CI;
513 else
514 return cast<ConstantInt>(
515 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
516 }
517 return nullptr;
518}
519
520namespace {
521
522/// Given a chain of or (||) or and (&&) comparison of a value against a
523/// constant, this will try to recover the information required for a switch
524/// structure.
525/// It will depth-first traverse the chain of comparison, seeking for patterns
526/// like %a == 12 or %a < 4 and combine them to produce a set of integer
527/// representing the different cases for the switch.
528/// Note that if the chain is composed of '||' it will build the set of elements
529/// that matches the comparisons (i.e. any of this value validate the chain)
530/// while for a chain of '&&' it will build the set elements that make the test
531/// fail.
532struct ConstantComparesGatherer {
533 const DataLayout &DL;
534
535 /// Value found for the switch comparison
536 Value *CompValue = nullptr;
537
538 /// Extra clause to be checked before the switch
539 Value *Extra = nullptr;
540
541 /// Set of integers to match in switch
543
544 /// Number of comparisons matched in the and/or chain
545 unsigned UsedICmps = 0;
546
547 /// Construct and compute the result for the comparison instruction Cond
548 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
549 gather(Cond);
550 }
551
552 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
553 ConstantComparesGatherer &
554 operator=(const ConstantComparesGatherer &) = delete;
555
556private:
557 /// Try to set the current value used for the comparison, it succeeds only if
558 /// it wasn't set before or if the new value is the same as the old one
559 bool setValueOnce(Value *NewVal) {
560 if (CompValue && CompValue != NewVal)
561 return false;
562 CompValue = NewVal;
563 return (CompValue != nullptr);
564 }
565
566 /// Try to match Instruction "I" as a comparison against a constant and
567 /// populates the array Vals with the set of values that match (or do not
568 /// match depending on isEQ).
569 /// Return false on failure. On success, the Value the comparison matched
570 /// against is placed in CompValue.
571 /// If CompValue is already set, the function is expected to fail if a match
572 /// is found but the value compared to is different.
573 bool matchInstruction(Instruction *I, bool isEQ) {
574 // If this is an icmp against a constant, handle this as one of the cases.
575 ICmpInst *ICI;
576 ConstantInt *C;
577 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
578 (C = getConstantInt(I->getOperand(1), DL)))) {
579 return false;
580 }
581
582 Value *RHSVal;
583 const APInt *RHSC;
584
585 // Pattern match a special case
586 // (x & ~2^z) == y --> x == y || x == y|2^z
587 // This undoes a transformation done by instcombine to fuse 2 compares.
588 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
589 // It's a little bit hard to see why the following transformations are
590 // correct. Here is a CVC3 program to verify them for 64-bit values:
591
592 /*
593 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
594 x : BITVECTOR(64);
595 y : BITVECTOR(64);
596 z : BITVECTOR(64);
597 mask : BITVECTOR(64) = BVSHL(ONE, z);
598 QUERY( (y & ~mask = y) =>
599 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
600 );
601 QUERY( (y | mask = y) =>
602 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
603 );
604 */
605
606 // Please note that each pattern must be a dual implication (<--> or
607 // iff). One directional implication can create spurious matches. If the
608 // implication is only one-way, an unsatisfiable condition on the left
609 // side can imply a satisfiable condition on the right side. Dual
610 // implication ensures that satisfiable conditions are transformed to
611 // other satisfiable conditions and unsatisfiable conditions are
612 // transformed to other unsatisfiable conditions.
613
614 // Here is a concrete example of a unsatisfiable condition on the left
615 // implying a satisfiable condition on the right:
616 //
617 // mask = (1 << z)
618 // (x & ~mask) == y --> (x == y || x == (y | mask))
619 //
620 // Substituting y = 3, z = 0 yields:
621 // (x & -2) == 3 --> (x == 3 || x == 2)
622
623 // Pattern match a special case:
624 /*
625 QUERY( (y & ~mask = y) =>
626 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
627 );
628 */
629 if (match(ICI->getOperand(0),
630 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
631 APInt Mask = ~*RHSC;
632 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
633 // If we already have a value for the switch, it has to match!
634 if (!setValueOnce(RHSVal))
635 return false;
636
637 Vals.push_back(C);
638 Vals.push_back(
639 ConstantInt::get(C->getContext(),
640 C->getValue() | Mask));
641 UsedICmps++;
642 return true;
643 }
644 }
645
646 // Pattern match a special case:
647 /*
648 QUERY( (y | mask = y) =>
649 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
650 );
651 */
652 if (match(ICI->getOperand(0),
653 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
654 APInt Mask = *RHSC;
655 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
656 // If we already have a value for the switch, it has to match!
657 if (!setValueOnce(RHSVal))
658 return false;
659
660 Vals.push_back(C);
661 Vals.push_back(ConstantInt::get(C->getContext(),
662 C->getValue() & ~Mask));
663 UsedICmps++;
664 return true;
665 }
666 }
667
668 // If we already have a value for the switch, it has to match!
669 if (!setValueOnce(ICI->getOperand(0)))
670 return false;
671
672 UsedICmps++;
673 Vals.push_back(C);
674 return ICI->getOperand(0);
675 }
676
677 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
678 ConstantRange Span =
680
681 // Shift the range if the compare is fed by an add. This is the range
682 // compare idiom as emitted by instcombine.
683 Value *CandidateVal = I->getOperand(0);
684 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
685 Span = Span.subtract(*RHSC);
686 CandidateVal = RHSVal;
687 }
688
689 // If this is an and/!= check, then we are looking to build the set of
690 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
691 // x != 0 && x != 1.
692 if (!isEQ)
693 Span = Span.inverse();
694
695 // If there are a ton of values, we don't want to make a ginormous switch.
696 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
697 return false;
698 }
699
700 // If we already have a value for the switch, it has to match!
701 if (!setValueOnce(CandidateVal))
702 return false;
703
704 // Add all values from the range to the set
705 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
706 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
707
708 UsedICmps++;
709 return true;
710 }
711
712 /// Given a potentially 'or'd or 'and'd together collection of icmp
713 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
714 /// the value being compared, and stick the list constants into the Vals
715 /// vector.
716 /// One "Extra" case is allowed to differ from the other.
717 void gather(Value *V) {
718 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
719
720 // Keep a stack (SmallVector for efficiency) for depth-first traversal
723
724 // Initialize
725 Visited.insert(V);
726 DFT.push_back(V);
727
728 while (!DFT.empty()) {
729 V = DFT.pop_back_val();
730
731 if (Instruction *I = dyn_cast<Instruction>(V)) {
732 // If it is a || (or && depending on isEQ), process the operands.
733 Value *Op0, *Op1;
734 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
735 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
736 if (Visited.insert(Op1).second)
737 DFT.push_back(Op1);
738 if (Visited.insert(Op0).second)
739 DFT.push_back(Op0);
740
741 continue;
742 }
743
744 // Try to match the current instruction
745 if (matchInstruction(I, isEQ))
746 // Match succeed, continue the loop
747 continue;
748 }
749
750 // One element of the sequence of || (or &&) could not be match as a
751 // comparison against the same value as the others.
752 // We allow only one "Extra" case to be checked before the switch
753 if (!Extra) {
754 Extra = V;
755 continue;
756 }
757 // Failed to parse a proper sequence, abort now
758 CompValue = nullptr;
759 break;
760 }
761 }
762};
763
764} // end anonymous namespace
765
767 MemorySSAUpdater *MSSAU = nullptr) {
768 Instruction *Cond = nullptr;
769 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
770 Cond = dyn_cast<Instruction>(SI->getCondition());
771 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
772 if (BI->isConditional())
773 Cond = dyn_cast<Instruction>(BI->getCondition());
774 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
775 Cond = dyn_cast<Instruction>(IBI->getAddress());
776 }
777
778 TI->eraseFromParent();
779 if (Cond)
781}
782
783/// Return true if the specified terminator checks
784/// to see if a value is equal to constant integer value.
785Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
786 Value *CV = nullptr;
787 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
788 // Do not permit merging of large switch instructions into their
789 // predecessors unless there is only one predecessor.
790 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
791 CV = SI->getCondition();
792 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
793 if (BI->isConditional() && BI->getCondition()->hasOneUse())
794 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
795 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
796 CV = ICI->getOperand(0);
797 }
798
799 // Unwrap any lossless ptrtoint cast.
800 if (CV) {
801 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
802 Value *Ptr = PTII->getPointerOperand();
803 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
804 CV = Ptr;
805 }
806 }
807 return CV;
808}
809
810/// Given a value comparison instruction,
811/// decode all of the 'cases' that it represents and return the 'default' block.
812BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
813 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
814 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
815 Cases.reserve(SI->getNumCases());
816 for (auto Case : SI->cases())
817 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
818 Case.getCaseSuccessor()));
819 return SI->getDefaultDest();
820 }
821
822 BranchInst *BI = cast<BranchInst>(TI);
823 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
824 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
825 Cases.push_back(ValueEqualityComparisonCase(
826 getConstantInt(ICI->getOperand(1), DL), Succ));
827 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
828}
829
830/// Given a vector of bb/value pairs, remove any entries
831/// in the list that match the specified block.
832static void
834 std::vector<ValueEqualityComparisonCase> &Cases) {
835 llvm::erase(Cases, BB);
836}
837
838/// Return true if there are any keys in C1 that exist in C2 as well.
839static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
840 std::vector<ValueEqualityComparisonCase> &C2) {
841 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
842
843 // Make V1 be smaller than V2.
844 if (V1->size() > V2->size())
845 std::swap(V1, V2);
846
847 if (V1->empty())
848 return false;
849 if (V1->size() == 1) {
850 // Just scan V2.
851 ConstantInt *TheVal = (*V1)[0].Value;
852 for (const ValueEqualityComparisonCase &VECC : *V2)
853 if (TheVal == VECC.Value)
854 return true;
855 }
856
857 // Otherwise, just sort both lists and compare element by element.
858 array_pod_sort(V1->begin(), V1->end());
859 array_pod_sort(V2->begin(), V2->end());
860 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
861 while (i1 != e1 && i2 != e2) {
862 if ((*V1)[i1].Value == (*V2)[i2].Value)
863 return true;
864 if ((*V1)[i1].Value < (*V2)[i2].Value)
865 ++i1;
866 else
867 ++i2;
868 }
869 return false;
870}
871
872// Set branch weights on SwitchInst. This sets the metadata if there is at
873// least one non-zero weight.
875 bool IsExpected) {
876 // Check that there is at least one non-zero weight. Otherwise, pass
877 // nullptr to setMetadata which will erase the existing metadata.
878 MDNode *N = nullptr;
879 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
880 N = MDBuilder(SI->getParent()->getContext())
881 .createBranchWeights(Weights, IsExpected);
882 SI->setMetadata(LLVMContext::MD_prof, N);
883}
884
885// Similar to the above, but for branch and select instructions that take
886// exactly 2 weights.
887static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
888 uint32_t FalseWeight, bool IsExpected) {
889 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
890 // Check that there is at least one non-zero weight. Otherwise, pass
891 // nullptr to setMetadata which will erase the existing metadata.
892 MDNode *N = nullptr;
893 if (TrueWeight || FalseWeight)
894 N = MDBuilder(I->getParent()->getContext())
895 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
896 I->setMetadata(LLVMContext::MD_prof, N);
897}
898
899/// If TI is known to be a terminator instruction and its block is known to
900/// only have a single predecessor block, check to see if that predecessor is
901/// also a value comparison with the same value, and if that comparison
902/// determines the outcome of this comparison. If so, simplify TI. This does a
903/// very limited form of jump threading.
904bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
905 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
906 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
907 if (!PredVal)
908 return false; // Not a value comparison in predecessor.
909
910 Value *ThisVal = isValueEqualityComparison(TI);
911 assert(ThisVal && "This isn't a value comparison!!");
912 if (ThisVal != PredVal)
913 return false; // Different predicates.
914
915 // TODO: Preserve branch weight metadata, similarly to how
916 // foldValueComparisonIntoPredecessors preserves it.
917
918 // Find out information about when control will move from Pred to TI's block.
919 std::vector<ValueEqualityComparisonCase> PredCases;
920 BasicBlock *PredDef =
921 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
922 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
923
924 // Find information about how control leaves this block.
925 std::vector<ValueEqualityComparisonCase> ThisCases;
926 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
927 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
928
929 // If TI's block is the default block from Pred's comparison, potentially
930 // simplify TI based on this knowledge.
931 if (PredDef == TI->getParent()) {
932 // If we are here, we know that the value is none of those cases listed in
933 // PredCases. If there are any cases in ThisCases that are in PredCases, we
934 // can simplify TI.
935 if (!valuesOverlap(PredCases, ThisCases))
936 return false;
937
938 if (isa<BranchInst>(TI)) {
939 // Okay, one of the successors of this condbr is dead. Convert it to a
940 // uncond br.
941 assert(ThisCases.size() == 1 && "Branch can only have one case!");
942 // Insert the new branch.
943 Instruction *NI = Builder.CreateBr(ThisDef);
944 (void)NI;
945
946 // Remove PHI node entries for the dead edge.
947 ThisCases[0].Dest->removePredecessor(PredDef);
948
949 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
950 << "Through successor TI: " << *TI << "Leaving: " << *NI
951 << "\n");
952
954
955 if (DTU)
956 DTU->applyUpdates(
957 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
958
959 return true;
960 }
961
962 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
963 // Okay, TI has cases that are statically dead, prune them away.
965 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
966 DeadCases.insert(PredCases[i].Value);
967
968 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
969 << "Through successor TI: " << *TI);
970
971 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
972 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
973 --i;
974 auto *Successor = i->getCaseSuccessor();
975 if (DTU)
976 ++NumPerSuccessorCases[Successor];
977 if (DeadCases.count(i->getCaseValue())) {
978 Successor->removePredecessor(PredDef);
979 SI.removeCase(i);
980 if (DTU)
981 --NumPerSuccessorCases[Successor];
982 }
983 }
984
985 if (DTU) {
986 std::vector<DominatorTree::UpdateType> Updates;
987 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
988 if (I.second == 0)
989 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
990 DTU->applyUpdates(Updates);
991 }
992
993 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
994 return true;
995 }
996
997 // Otherwise, TI's block must correspond to some matched value. Find out
998 // which value (or set of values) this is.
999 ConstantInt *TIV = nullptr;
1000 BasicBlock *TIBB = TI->getParent();
1001 for (const auto &[Value, Dest] : PredCases)
1002 if (Dest == TIBB) {
1003 if (TIV)
1004 return false; // Cannot handle multiple values coming to this block.
1005 TIV = Value;
1006 }
1007 assert(TIV && "No edge from pred to succ?");
1008
1009 // Okay, we found the one constant that our value can be if we get into TI's
1010 // BB. Find out which successor will unconditionally be branched to.
1011 BasicBlock *TheRealDest = nullptr;
1012 for (const auto &[Value, Dest] : ThisCases)
1013 if (Value == TIV) {
1014 TheRealDest = Dest;
1015 break;
1016 }
1017
1018 // If not handled by any explicit cases, it is handled by the default case.
1019 if (!TheRealDest)
1020 TheRealDest = ThisDef;
1021
1022 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1023
1024 // Remove PHI node entries for dead edges.
1025 BasicBlock *CheckEdge = TheRealDest;
1026 for (BasicBlock *Succ : successors(TIBB))
1027 if (Succ != CheckEdge) {
1028 if (Succ != TheRealDest)
1029 RemovedSuccs.insert(Succ);
1030 Succ->removePredecessor(TIBB);
1031 } else
1032 CheckEdge = nullptr;
1033
1034 // Insert the new branch.
1035 Instruction *NI = Builder.CreateBr(TheRealDest);
1036 (void)NI;
1037
1038 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1039 << "Through successor TI: " << *TI << "Leaving: " << *NI
1040 << "\n");
1041
1043 if (DTU) {
1045 Updates.reserve(RemovedSuccs.size());
1046 for (auto *RemovedSucc : RemovedSuccs)
1047 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1048 DTU->applyUpdates(Updates);
1049 }
1050 return true;
1051}
1052
1053namespace {
1054
1055/// This class implements a stable ordering of constant
1056/// integers that does not depend on their address. This is important for
1057/// applications that sort ConstantInt's to ensure uniqueness.
1058struct ConstantIntOrdering {
1059 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1060 return LHS->getValue().ult(RHS->getValue());
1061 }
1062};
1063
1064} // end anonymous namespace
1065
1067 ConstantInt *const *P2) {
1068 const ConstantInt *LHS = *P1;
1069 const ConstantInt *RHS = *P2;
1070 if (LHS == RHS)
1071 return 0;
1072 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1073}
1074
1075/// Get Weights of a given terminator, the default weight is at the front
1076/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1077/// metadata.
1079 SmallVectorImpl<uint64_t> &Weights) {
1080 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1081 assert(MD && "Invalid branch-weight metadata");
1082 extractFromBranchWeightMD64(MD, Weights);
1083
1084 // If TI is a conditional eq, the default case is the false case,
1085 // and the corresponding branch-weight data is at index 2. We swap the
1086 // default weight to be the first entry.
1087 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1088 assert(Weights.size() == 2);
1089 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1090 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1091 std::swap(Weights.front(), Weights.back());
1092 }
1093}
1094
1095/// Keep halving the weights until all can fit in uint32_t.
1097 uint64_t Max = *llvm::max_element(Weights);
1098 if (Max > UINT_MAX) {
1099 unsigned Offset = 32 - llvm::countl_zero(Max);
1100 for (uint64_t &I : Weights)
1101 I >>= Offset;
1102 }
1103}
1104
1106 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1107 Instruction *PTI = PredBlock->getTerminator();
1108
1109 // If we have bonus instructions, clone them into the predecessor block.
1110 // Note that there may be multiple predecessor blocks, so we cannot move
1111 // bonus instructions to a predecessor block.
1112 for (Instruction &BonusInst : *BB) {
1113 if (BonusInst.isTerminator())
1114 continue;
1115
1116 Instruction *NewBonusInst = BonusInst.clone();
1117
1118 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1119 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1120 // Unless the instruction has the same !dbg location as the original
1121 // branch, drop it. When we fold the bonus instructions we want to make
1122 // sure we reset their debug locations in order to avoid stepping on
1123 // dead code caused by folding dead branches.
1124 NewBonusInst->setDebugLoc(DebugLoc());
1125 }
1126
1127 RemapInstruction(NewBonusInst, VMap,
1129
1130 // If we speculated an instruction, we need to drop any metadata that may
1131 // result in undefined behavior, as the metadata might have been valid
1132 // only given the branch precondition.
1133 // Similarly strip attributes on call parameters that may cause UB in
1134 // location the call is moved to.
1135 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1136
1137 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1138 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1139 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1141
1142 if (isa<DbgInfoIntrinsic>(BonusInst))
1143 continue;
1144
1145 NewBonusInst->takeName(&BonusInst);
1146 BonusInst.setName(NewBonusInst->getName() + ".old");
1147 VMap[&BonusInst] = NewBonusInst;
1148
1149 // Update (liveout) uses of bonus instructions,
1150 // now that the bonus instruction has been cloned into predecessor.
1151 // Note that we expect to be in a block-closed SSA form for this to work!
1152 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1153 auto *UI = cast<Instruction>(U.getUser());
1154 auto *PN = dyn_cast<PHINode>(UI);
1155 if (!PN) {
1156 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1157 "If the user is not a PHI node, then it should be in the same "
1158 "block as, and come after, the original bonus instruction.");
1159 continue; // Keep using the original bonus instruction.
1160 }
1161 // Is this the block-closed SSA form PHI node?
1162 if (PN->getIncomingBlock(U) == BB)
1163 continue; // Great, keep using the original bonus instruction.
1164 // The only other alternative is an "use" when coming from
1165 // the predecessor block - here we should refer to the cloned bonus instr.
1166 assert(PN->getIncomingBlock(U) == PredBlock &&
1167 "Not in block-closed SSA form?");
1168 U.set(NewBonusInst);
1169 }
1170 }
1171}
1172
1173bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1174 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1175 BasicBlock *BB = TI->getParent();
1176 BasicBlock *Pred = PTI->getParent();
1177
1179
1180 // Figure out which 'cases' to copy from SI to PSI.
1181 std::vector<ValueEqualityComparisonCase> BBCases;
1182 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1183
1184 std::vector<ValueEqualityComparisonCase> PredCases;
1185 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1186
1187 // Based on whether the default edge from PTI goes to BB or not, fill in
1188 // PredCases and PredDefault with the new switch cases we would like to
1189 // build.
1191
1192 // Update the branch weight metadata along the way
1194 bool PredHasWeights = hasBranchWeightMD(*PTI);
1195 bool SuccHasWeights = hasBranchWeightMD(*TI);
1196
1197 if (PredHasWeights) {
1198 getBranchWeights(PTI, Weights);
1199 // branch-weight metadata is inconsistent here.
1200 if (Weights.size() != 1 + PredCases.size())
1201 PredHasWeights = SuccHasWeights = false;
1202 } else if (SuccHasWeights)
1203 // If there are no predecessor weights but there are successor weights,
1204 // populate Weights with 1, which will later be scaled to the sum of
1205 // successor's weights
1206 Weights.assign(1 + PredCases.size(), 1);
1207
1208 SmallVector<uint64_t, 8> SuccWeights;
1209 if (SuccHasWeights) {
1210 getBranchWeights(TI, SuccWeights);
1211 // branch-weight metadata is inconsistent here.
1212 if (SuccWeights.size() != 1 + BBCases.size())
1213 PredHasWeights = SuccHasWeights = false;
1214 } else if (PredHasWeights)
1215 SuccWeights.assign(1 + BBCases.size(), 1);
1216
1217 if (PredDefault == BB) {
1218 // If this is the default destination from PTI, only the edges in TI
1219 // that don't occur in PTI, or that branch to BB will be activated.
1220 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1221 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1222 if (PredCases[i].Dest != BB)
1223 PTIHandled.insert(PredCases[i].Value);
1224 else {
1225 // The default destination is BB, we don't need explicit targets.
1226 std::swap(PredCases[i], PredCases.back());
1227
1228 if (PredHasWeights || SuccHasWeights) {
1229 // Increase weight for the default case.
1230 Weights[0] += Weights[i + 1];
1231 std::swap(Weights[i + 1], Weights.back());
1232 Weights.pop_back();
1233 }
1234
1235 PredCases.pop_back();
1236 --i;
1237 --e;
1238 }
1239
1240 // Reconstruct the new switch statement we will be building.
1241 if (PredDefault != BBDefault) {
1242 PredDefault->removePredecessor(Pred);
1243 if (DTU && PredDefault != BB)
1244 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1245 PredDefault = BBDefault;
1246 ++NewSuccessors[BBDefault];
1247 }
1248
1249 unsigned CasesFromPred = Weights.size();
1250 uint64_t ValidTotalSuccWeight = 0;
1251 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1252 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1253 PredCases.push_back(BBCases[i]);
1254 ++NewSuccessors[BBCases[i].Dest];
1255 if (SuccHasWeights || PredHasWeights) {
1256 // The default weight is at index 0, so weight for the ith case
1257 // should be at index i+1. Scale the cases from successor by
1258 // PredDefaultWeight (Weights[0]).
1259 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1260 ValidTotalSuccWeight += SuccWeights[i + 1];
1261 }
1262 }
1263
1264 if (SuccHasWeights || PredHasWeights) {
1265 ValidTotalSuccWeight += SuccWeights[0];
1266 // Scale the cases from predecessor by ValidTotalSuccWeight.
1267 for (unsigned i = 1; i < CasesFromPred; ++i)
1268 Weights[i] *= ValidTotalSuccWeight;
1269 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1270 Weights[0] *= SuccWeights[0];
1271 }
1272 } else {
1273 // If this is not the default destination from PSI, only the edges
1274 // in SI that occur in PSI with a destination of BB will be
1275 // activated.
1276 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1277 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1278 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1279 if (PredCases[i].Dest == BB) {
1280 PTIHandled.insert(PredCases[i].Value);
1281
1282 if (PredHasWeights || SuccHasWeights) {
1283 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1284 std::swap(Weights[i + 1], Weights.back());
1285 Weights.pop_back();
1286 }
1287
1288 std::swap(PredCases[i], PredCases.back());
1289 PredCases.pop_back();
1290 --i;
1291 --e;
1292 }
1293
1294 // Okay, now we know which constants were sent to BB from the
1295 // predecessor. Figure out where they will all go now.
1296 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1297 if (PTIHandled.count(BBCases[i].Value)) {
1298 // If this is one we are capable of getting...
1299 if (PredHasWeights || SuccHasWeights)
1300 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1301 PredCases.push_back(BBCases[i]);
1302 ++NewSuccessors[BBCases[i].Dest];
1303 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1304 }
1305
1306 // If there are any constants vectored to BB that TI doesn't handle,
1307 // they must go to the default destination of TI.
1308 for (ConstantInt *I : PTIHandled) {
1309 if (PredHasWeights || SuccHasWeights)
1310 Weights.push_back(WeightsForHandled[I]);
1311 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1312 ++NewSuccessors[BBDefault];
1313 }
1314 }
1315
1316 // Okay, at this point, we know which new successor Pred will get. Make
1317 // sure we update the number of entries in the PHI nodes for these
1318 // successors.
1319 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1320 if (DTU) {
1321 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1322 Updates.reserve(Updates.size() + NewSuccessors.size());
1323 }
1324 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1325 NewSuccessors) {
1326 for (auto I : seq(NewSuccessor.second)) {
1327 (void)I;
1328 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1329 }
1330 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1331 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1332 }
1333
1334 Builder.SetInsertPoint(PTI);
1335 // Convert pointer to int before we switch.
1336 if (CV->getType()->isPointerTy()) {
1337 CV =
1338 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1339 }
1340
1341 // Now that the successors are updated, create the new Switch instruction.
1342 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1343 NewSI->setDebugLoc(PTI->getDebugLoc());
1344 for (ValueEqualityComparisonCase &V : PredCases)
1345 NewSI->addCase(V.Value, V.Dest);
1346
1347 if (PredHasWeights || SuccHasWeights) {
1348 // Halve the weights if any of them cannot fit in an uint32_t
1349 fitWeights(Weights);
1350
1351 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1352
1353 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1354 }
1355
1357
1358 // Okay, last check. If BB is still a successor of PSI, then we must
1359 // have an infinite loop case. If so, add an infinitely looping block
1360 // to handle the case to preserve the behavior of the code.
1361 BasicBlock *InfLoopBlock = nullptr;
1362 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1363 if (NewSI->getSuccessor(i) == BB) {
1364 if (!InfLoopBlock) {
1365 // Insert it at the end of the function, because it's either code,
1366 // or it won't matter if it's hot. :)
1367 InfLoopBlock =
1368 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1369 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1370 if (DTU)
1371 Updates.push_back(
1372 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1373 }
1374 NewSI->setSuccessor(i, InfLoopBlock);
1375 }
1376
1377 if (DTU) {
1378 if (InfLoopBlock)
1379 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1380
1381 Updates.push_back({DominatorTree::Delete, Pred, BB});
1382
1383 DTU->applyUpdates(Updates);
1384 }
1385
1386 ++NumFoldValueComparisonIntoPredecessors;
1387 return true;
1388}
1389
1390/// The specified terminator is a value equality comparison instruction
1391/// (either a switch or a branch on "X == c").
1392/// See if any of the predecessors of the terminator block are value comparisons
1393/// on the same value. If so, and if safe to do so, fold them together.
1394bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1395 IRBuilder<> &Builder) {
1396 BasicBlock *BB = TI->getParent();
1397 Value *CV = isValueEqualityComparison(TI); // CondVal
1398 assert(CV && "Not a comparison?");
1399
1400 bool Changed = false;
1401
1403 while (!Preds.empty()) {
1404 BasicBlock *Pred = Preds.pop_back_val();
1405 Instruction *PTI = Pred->getTerminator();
1406
1407 // Don't try to fold into itself.
1408 if (Pred == BB)
1409 continue;
1410
1411 // See if the predecessor is a comparison with the same value.
1412 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1413 if (PCV != CV)
1414 continue;
1415
1417 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1418 for (auto *Succ : FailBlocks) {
1419 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1420 return false;
1421 }
1422 }
1423
1424 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1425 Changed = true;
1426 }
1427 return Changed;
1428}
1429
1430// If we would need to insert a select that uses the value of this invoke
1431// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1432// need to do this), we can't hoist the invoke, as there is nowhere to put the
1433// select in this case.
1435 Instruction *I1, Instruction *I2) {
1436 for (BasicBlock *Succ : successors(BB1)) {
1437 for (const PHINode &PN : Succ->phis()) {
1438 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1439 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1440 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1441 return false;
1442 }
1443 }
1444 }
1445 return true;
1446}
1447
1448// Get interesting characteristics of instructions that
1449// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1450// instructions can be reordered across.
1456
1458 unsigned Flags = 0;
1459 if (I->mayReadFromMemory())
1460 Flags |= SkipReadMem;
1461 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1462 // inalloca) across stacksave/stackrestore boundaries.
1463 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1464 Flags |= SkipSideEffect;
1466 Flags |= SkipImplicitControlFlow;
1467 return Flags;
1468}
1469
1470// Returns true if it is safe to reorder an instruction across preceding
1471// instructions in a basic block.
1472static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1473 // Don't reorder a store over a load.
1474 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1475 return false;
1476
1477 // If we have seen an instruction with side effects, it's unsafe to reorder an
1478 // instruction which reads memory or itself has side effects.
1479 if ((Flags & SkipSideEffect) &&
1480 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1481 return false;
1482
1483 // Reordering across an instruction which does not necessarily transfer
1484 // control to the next instruction is speculation.
1486 return false;
1487
1488 // Hoisting of llvm.deoptimize is only legal together with the next return
1489 // instruction, which this pass is not always able to do.
1490 if (auto *CB = dyn_cast<CallBase>(I))
1491 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1492 return false;
1493
1494 // It's also unsafe/illegal to hoist an instruction above its instruction
1495 // operands
1496 BasicBlock *BB = I->getParent();
1497 for (Value *Op : I->operands()) {
1498 if (auto *J = dyn_cast<Instruction>(Op))
1499 if (J->getParent() == BB)
1500 return false;
1501 }
1502
1503 return true;
1504}
1505
1506static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1507
1508/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1509/// instructions \p I1 and \p I2 can and should be hoisted.
1511 const TargetTransformInfo &TTI) {
1512 // If we're going to hoist a call, make sure that the two instructions
1513 // we're commoning/hoisting are both marked with musttail, or neither of
1514 // them is marked as such. Otherwise, we might end up in a situation where
1515 // we hoist from a block where the terminator is a `ret` to a block where
1516 // the terminator is a `br`, and `musttail` calls expect to be followed by
1517 // a return.
1518 auto *C1 = dyn_cast<CallInst>(I1);
1519 auto *C2 = dyn_cast<CallInst>(I2);
1520 if (C1 && C2)
1521 if (C1->isMustTailCall() != C2->isMustTailCall())
1522 return false;
1523
1525 return false;
1526
1527 // If any of the two call sites has nomerge or convergent attribute, stop
1528 // hoisting.
1529 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1530 if (CB1->cannotMerge() || CB1->isConvergent())
1531 return false;
1532 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1533 if (CB2->cannotMerge() || CB2->isConvergent())
1534 return false;
1535
1536 return true;
1537}
1538
1539/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1540/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1541/// hoistCommonCodeFromSuccessors. e.g. The input:
1542/// I1 DVRs: { x, z },
1543/// OtherInsts: { I2 DVRs: { x, y, z } }
1544/// would result in hoisting only DbgVariableRecord x.
1546 Instruction *TI, Instruction *I1,
1547 SmallVectorImpl<Instruction *> &OtherInsts) {
1548 if (!I1->hasDbgRecords())
1549 return;
1550 using CurrentAndEndIt =
1551 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1552 // Vector of {Current, End} iterators.
1554 Itrs.reserve(OtherInsts.size() + 1);
1555 // Helper lambdas for lock-step checks:
1556 // Return true if this Current == End.
1557 auto atEnd = [](const CurrentAndEndIt &Pair) {
1558 return Pair.first == Pair.second;
1559 };
1560 // Return true if all Current are identical.
1561 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1562 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1564 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1565 });
1566 };
1567
1568 // Collect the iterators.
1569 Itrs.push_back(
1570 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1571 for (Instruction *Other : OtherInsts) {
1572 if (!Other->hasDbgRecords())
1573 return;
1574 Itrs.push_back(
1575 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1576 }
1577
1578 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1579 // the lock-step DbgRecord are identical, hoist all of them to TI.
1580 // This replicates the dbg.* intrinsic behaviour in
1581 // hoistCommonCodeFromSuccessors.
1582 while (none_of(Itrs, atEnd)) {
1583 bool HoistDVRs = allIdentical(Itrs);
1584 for (CurrentAndEndIt &Pair : Itrs) {
1585 // Increment Current iterator now as we may be about to move the
1586 // DbgRecord.
1587 DbgRecord &DR = *Pair.first++;
1588 if (HoistDVRs) {
1589 DR.removeFromParent();
1590 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1591 }
1592 }
1593 }
1594}
1595
1597 const Instruction *I2) {
1598 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1599 return true;
1600
1601 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1602 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1603 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1604 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1605 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1606
1607 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1608 return I1->getOperand(0) == I2->getOperand(1) &&
1609 I1->getOperand(1) == I2->getOperand(0) &&
1610 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1611 }
1612
1613 return false;
1614}
1615
1616/// If the target supports conditional faulting,
1617/// we look for the following pattern:
1618/// \code
1619/// BB:
1620/// ...
1621/// %cond = icmp ult %x, %y
1622/// br i1 %cond, label %TrueBB, label %FalseBB
1623/// FalseBB:
1624/// store i32 1, ptr %q, align 4
1625/// ...
1626/// TrueBB:
1627/// %maskedloadstore = load i32, ptr %b, align 4
1628/// store i32 %maskedloadstore, ptr %p, align 4
1629/// ...
1630/// \endcode
1631///
1632/// and transform it into:
1633///
1634/// \code
1635/// BB:
1636/// ...
1637/// %cond = icmp ult %x, %y
1638/// %maskedloadstore = cload i32, ptr %b, %cond
1639/// cstore i32 %maskedloadstore, ptr %p, %cond
1640/// cstore i32 1, ptr %q, ~%cond
1641/// br i1 %cond, label %TrueBB, label %FalseBB
1642/// FalseBB:
1643/// ...
1644/// TrueBB:
1645/// ...
1646/// \endcode
1647///
1648/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1649/// e.g.
1650///
1651/// \code
1652/// %vcond = bitcast i1 %cond to <1 x i1>
1653/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1654/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1655/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1656/// call void @llvm.masked.store.v1i32.p0
1657/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1658/// %cond.not = xor i1 %cond, true
1659/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1660/// call void @llvm.masked.store.v1i32.p0
1661/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1662/// \endcode
1663///
1664/// So we need to turn hoisted load/store into cload/cstore.
1665///
1666/// \param BI The branch instruction.
1667/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1668/// will be speculated.
1669/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1671 BranchInst *BI,
1672 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1673 std::optional<bool> Invert) {
1674 auto &Context = BI->getParent()->getContext();
1675 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1676 auto *Cond = BI->getOperand(0);
1677 // Construct the condition if needed.
1678 BasicBlock *BB = BI->getParent();
1679 IRBuilder<> Builder(
1680 Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
1681 Value *Mask = nullptr;
1682 Value *MaskFalse = nullptr;
1683 Value *MaskTrue = nullptr;
1684 if (Invert.has_value()) {
1685 Mask = Builder.CreateBitCast(
1686 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1687 VCondTy);
1688 } else {
1689 MaskFalse = Builder.CreateBitCast(
1690 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1691 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1692 }
1693 auto PeekThroughBitcasts = [](Value *V) {
1694 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1695 V = BitCast->getOperand(0);
1696 return V;
1697 };
1698 for (auto *I : SpeculatedConditionalLoadsStores) {
1699 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1700 if (!Invert.has_value())
1701 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1702 // We currently assume conditional faulting load/store is supported for
1703 // scalar types only when creating new instructions. This can be easily
1704 // extended for vector types in the future.
1705 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1706 auto *Op0 = I->getOperand(0);
1707 CallInst *MaskedLoadStore = nullptr;
1708 if (auto *LI = dyn_cast<LoadInst>(I)) {
1709 // Handle Load.
1710 auto *Ty = I->getType();
1711 PHINode *PN = nullptr;
1712 Value *PassThru = nullptr;
1713 if (Invert.has_value())
1714 for (User *U : I->users())
1715 if ((PN = dyn_cast<PHINode>(U))) {
1716 PassThru = Builder.CreateBitCast(
1717 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1718 FixedVectorType::get(Ty, 1));
1719 break;
1720 }
1721 MaskedLoadStore = Builder.CreateMaskedLoad(
1722 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1723 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1724 if (PN)
1725 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1726 I->replaceAllUsesWith(NewLoadStore);
1727 } else {
1728 // Handle Store.
1729 auto *StoredVal = Builder.CreateBitCast(
1730 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1731 MaskedLoadStore = Builder.CreateMaskedStore(
1732 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1733 }
1734 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1735 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1736 //
1737 // !nonnull, !align : Not support pointer type, no need to keep.
1738 // !range: Load type is changed from scalar to vector, but the metadata on
1739 // vector specifies a per-element range, so the semantics stay the
1740 // same. Keep it.
1741 // !annotation: Not impact semantics. Keep it.
1742 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1743 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1744 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1745 // FIXME: DIAssignID is not supported for masked store yet.
1746 // (Verifier::visitDIAssignIDMetadata)
1748 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1749 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1750 });
1751 MaskedLoadStore->copyMetadata(*I);
1752 I->eraseFromParent();
1753 }
1754}
1755
1757 const TargetTransformInfo &TTI) {
1758 // Not handle volatile or atomic.
1759 if (auto *L = dyn_cast<LoadInst>(I)) {
1760 if (!L->isSimple())
1761 return false;
1762 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1763 if (!S->isSimple())
1764 return false;
1765 } else
1766 return false;
1767
1768 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1769 // That's why we have the alignment limitation.
1770 // FIXME: Update the prototype of the intrinsics?
1773}
1774
1775namespace {
1776
1777// LockstepReverseIterator - Iterates through instructions
1778// in a set of blocks in reverse order from the first non-terminator.
1779// For example (assume all blocks have size n):
1780// LockstepReverseIterator I([B1, B2, B3]);
1781// *I-- = [B1[n], B2[n], B3[n]];
1782// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1783// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1784// ...
1785class LockstepReverseIterator {
1788 bool Fail;
1789
1790public:
1791 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
1792 reset();
1793 }
1794
1795 void reset() {
1796 Fail = false;
1797 Insts.clear();
1798 for (auto *BB : Blocks) {
1799 Instruction *Inst = BB->getTerminator();
1800 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1801 Inst = Inst->getPrevNode();
1802 if (!Inst) {
1803 // Block wasn't big enough.
1804 Fail = true;
1805 return;
1806 }
1807 Insts.push_back(Inst);
1808 }
1809 }
1810
1811 bool isValid() const { return !Fail; }
1812
1813 void operator--() {
1814 if (Fail)
1815 return;
1816 for (auto *&Inst : Insts) {
1817 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1818 Inst = Inst->getPrevNode();
1819 // Already at beginning of block.
1820 if (!Inst) {
1821 Fail = true;
1822 return;
1823 }
1824 }
1825 }
1826
1827 void operator++() {
1828 if (Fail)
1829 return;
1830 for (auto *&Inst : Insts) {
1831 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1832 Inst = Inst->getNextNode();
1833 // Already at end of block.
1834 if (!Inst) {
1835 Fail = true;
1836 return;
1837 }
1838 }
1839 }
1840
1841 ArrayRef<Instruction *> operator*() const { return Insts; }
1842};
1843
1844} // end anonymous namespace
1845
1846/// Hoist any common code in the successor blocks up into the block. This
1847/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1848/// given, only perform hoisting in case all successors blocks contain matching
1849/// instructions only. In that case, all instructions can be hoisted and the
1850/// original branch will be replaced and selects for PHIs are added.
1851bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1852 bool AllInstsEqOnly) {
1853 // This does very trivial matching, with limited scanning, to find identical
1854 // instructions in the two blocks. In particular, we don't want to get into
1855 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1856 // such, we currently just scan for obviously identical instructions in an
1857 // identical order, possibly separated by the same number of non-identical
1858 // instructions.
1859 BasicBlock *BB = TI->getParent();
1860 unsigned int SuccSize = succ_size(BB);
1861 if (SuccSize < 2)
1862 return false;
1863
1864 // If either of the blocks has it's address taken, then we can't do this fold,
1865 // because the code we'd hoist would no longer run when we jump into the block
1866 // by it's address.
1867 for (auto *Succ : successors(BB))
1868 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1869 return false;
1870
1871 // The second of pair is a SkipFlags bitmask.
1872 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1873 SmallVector<SuccIterPair, 8> SuccIterPairs;
1874 for (auto *Succ : successors(BB)) {
1875 BasicBlock::iterator SuccItr = Succ->begin();
1876 if (isa<PHINode>(*SuccItr))
1877 return false;
1878 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1879 }
1880
1881 if (AllInstsEqOnly) {
1882 // Check if all instructions in the successor blocks match. This allows
1883 // hoisting all instructions and removing the blocks we are hoisting from,
1884 // so does not add any new instructions.
1886 // Check if sizes and terminators of all successors match.
1887 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1888 Instruction *Term0 = Succs[0]->getTerminator();
1889 Instruction *Term = Succ->getTerminator();
1890 return !Term->isSameOperationAs(Term0) ||
1891 !equal(Term->operands(), Term0->operands()) ||
1892 Succs[0]->size() != Succ->size();
1893 });
1894 if (!AllSame)
1895 return false;
1896 if (AllSame) {
1897 LockstepReverseIterator LRI(Succs);
1898 while (LRI.isValid()) {
1899 Instruction *I0 = (*LRI)[0];
1900 if (any_of(*LRI, [I0](Instruction *I) {
1901 return !areIdenticalUpToCommutativity(I0, I);
1902 })) {
1903 return false;
1904 }
1905 --LRI;
1906 }
1907 }
1908 // Now we know that all instructions in all successors can be hoisted. Let
1909 // the loop below handle the hoisting.
1910 }
1911
1912 // Count how many instructions were not hoisted so far. There's a limit on how
1913 // many instructions we skip, serving as a compilation time control as well as
1914 // preventing excessive increase of life ranges.
1915 unsigned NumSkipped = 0;
1916 // If we find an unreachable instruction at the beginning of a basic block, we
1917 // can still hoist instructions from the rest of the basic blocks.
1918 if (SuccIterPairs.size() > 2) {
1919 erase_if(SuccIterPairs,
1920 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1921 if (SuccIterPairs.size() < 2)
1922 return false;
1923 }
1924
1925 bool Changed = false;
1926
1927 for (;;) {
1928 auto *SuccIterPairBegin = SuccIterPairs.begin();
1929 auto &BB1ItrPair = *SuccIterPairBegin++;
1930 auto OtherSuccIterPairRange =
1931 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1932 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1933
1934 Instruction *I1 = &*BB1ItrPair.first;
1935
1936 // Skip debug info if it is not identical.
1937 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1938 Instruction *I2 = &*Iter;
1939 return I1->isIdenticalToWhenDefined(I2);
1940 });
1941 if (!AllDbgInstsAreIdentical) {
1942 while (isa<DbgInfoIntrinsic>(I1))
1943 I1 = &*++BB1ItrPair.first;
1944 for (auto &SuccIter : OtherSuccIterRange) {
1945 Instruction *I2 = &*SuccIter;
1946 while (isa<DbgInfoIntrinsic>(I2))
1947 I2 = &*++SuccIter;
1948 }
1949 }
1950
1951 bool AllInstsAreIdentical = true;
1952 bool HasTerminator = I1->isTerminator();
1953 for (auto &SuccIter : OtherSuccIterRange) {
1954 Instruction *I2 = &*SuccIter;
1955 HasTerminator |= I2->isTerminator();
1956 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1957 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1958 AllInstsAreIdentical = false;
1959 }
1960
1962 for (auto &SuccIter : OtherSuccIterRange)
1963 OtherInsts.push_back(&*SuccIter);
1964
1965 // If we are hoisting the terminator instruction, don't move one (making a
1966 // broken BB), instead clone it, and remove BI.
1967 if (HasTerminator) {
1968 // Even if BB, which contains only one unreachable instruction, is ignored
1969 // at the beginning of the loop, we can hoist the terminator instruction.
1970 // If any instructions remain in the block, we cannot hoist terminators.
1971 if (NumSkipped || !AllInstsAreIdentical) {
1972 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1973 return Changed;
1974 }
1975
1976 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1977 Changed;
1978 }
1979
1980 if (AllInstsAreIdentical) {
1981 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1982 AllInstsAreIdentical =
1983 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1984 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1985 Instruction *I2 = &*Pair.first;
1986 unsigned SkipFlagsBB2 = Pair.second;
1987 // Even if the instructions are identical, it may not
1988 // be safe to hoist them if we have skipped over
1989 // instructions with side effects or their operands
1990 // weren't hoisted.
1991 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1993 });
1994 }
1995
1996 if (AllInstsAreIdentical) {
1997 BB1ItrPair.first++;
1998 if (isa<DbgInfoIntrinsic>(I1)) {
1999 // The debug location is an integral part of a debug info intrinsic
2000 // and can't be separated from it or replaced. Instead of attempting
2001 // to merge locations, simply hoist both copies of the intrinsic.
2002 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2003 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2004 // and leave any that were not hoisted behind (by calling moveBefore
2005 // rather than moveBeforePreserving).
2006 I1->moveBefore(TI);
2007 for (auto &SuccIter : OtherSuccIterRange) {
2008 auto *I2 = &*SuccIter++;
2009 assert(isa<DbgInfoIntrinsic>(I2));
2010 I2->moveBefore(TI);
2011 }
2012 } else {
2013 // For a normal instruction, we just move one to right before the
2014 // branch, then replace all uses of the other with the first. Finally,
2015 // we remove the now redundant second instruction.
2016 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2017 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2018 // and leave any that were not hoisted behind (by calling moveBefore
2019 // rather than moveBeforePreserving).
2020 I1->moveBefore(TI);
2021 for (auto &SuccIter : OtherSuccIterRange) {
2022 Instruction *I2 = &*SuccIter++;
2023 assert(I2 != I1);
2024 if (!I2->use_empty())
2025 I2->replaceAllUsesWith(I1);
2026 I1->andIRFlags(I2);
2027 if (auto *CB = dyn_cast<CallBase>(I1)) {
2028 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2029 assert(Success && "We should not be trying to hoist callbases "
2030 "with non-intersectable attributes");
2031 // For NDEBUG Compile.
2032 (void)Success;
2033 }
2034
2035 combineMetadataForCSE(I1, I2, true);
2036 // I1 and I2 are being combined into a single instruction. Its debug
2037 // location is the merged locations of the original instructions.
2038 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2039 I2->eraseFromParent();
2040 }
2041 }
2042 if (!Changed)
2043 NumHoistCommonCode += SuccIterPairs.size();
2044 Changed = true;
2045 NumHoistCommonInstrs += SuccIterPairs.size();
2046 } else {
2047 if (NumSkipped >= HoistCommonSkipLimit) {
2048 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2049 return Changed;
2050 }
2051 // We are about to skip over a pair of non-identical instructions. Record
2052 // if any have characteristics that would prevent reordering instructions
2053 // across them.
2054 for (auto &SuccIterPair : SuccIterPairs) {
2055 Instruction *I = &*SuccIterPair.first++;
2056 SuccIterPair.second |= skippedInstrFlags(I);
2057 }
2058 ++NumSkipped;
2059 }
2060 }
2061}
2062
2063bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2064 Instruction *TI, Instruction *I1,
2065 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2066
2067 auto *BI = dyn_cast<BranchInst>(TI);
2068
2069 bool Changed = false;
2070 BasicBlock *TIParent = TI->getParent();
2071 BasicBlock *BB1 = I1->getParent();
2072
2073 // Use only for an if statement.
2074 auto *I2 = *OtherSuccTIs.begin();
2075 auto *BB2 = I2->getParent();
2076 if (BI) {
2077 assert(OtherSuccTIs.size() == 1);
2078 assert(BI->getSuccessor(0) == I1->getParent());
2079 assert(BI->getSuccessor(1) == I2->getParent());
2080 }
2081
2082 // In the case of an if statement, we try to hoist an invoke.
2083 // FIXME: Can we define a safety predicate for CallBr?
2084 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2085 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2086 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2087 return false;
2088
2089 // TODO: callbr hoisting currently disabled pending further study.
2090 if (isa<CallBrInst>(I1))
2091 return false;
2092
2093 for (BasicBlock *Succ : successors(BB1)) {
2094 for (PHINode &PN : Succ->phis()) {
2095 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2096 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2097 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2098 if (BB1V == BB2V)
2099 continue;
2100
2101 // In the case of an if statement, check for
2102 // passingValueIsAlwaysUndefined here because we would rather eliminate
2103 // undefined control flow then converting it to a select.
2104 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2106 return false;
2107 }
2108 }
2109 }
2110
2111 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2112 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2113 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2114 // Clone the terminator and hoist it into the pred, without any debug info.
2115 Instruction *NT = I1->clone();
2116 NT->insertInto(TIParent, TI->getIterator());
2117 if (!NT->getType()->isVoidTy()) {
2118 I1->replaceAllUsesWith(NT);
2119 for (Instruction *OtherSuccTI : OtherSuccTIs)
2120 OtherSuccTI->replaceAllUsesWith(NT);
2121 NT->takeName(I1);
2122 }
2123 Changed = true;
2124 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2125
2126 // Ensure terminator gets a debug location, even an unknown one, in case
2127 // it involves inlinable calls.
2129 Locs.push_back(I1->getDebugLoc());
2130 for (auto *OtherSuccTI : OtherSuccTIs)
2131 Locs.push_back(OtherSuccTI->getDebugLoc());
2132 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
2133
2134 // PHIs created below will adopt NT's merged DebugLoc.
2135 IRBuilder<NoFolder> Builder(NT);
2136
2137 // In the case of an if statement, hoisting one of the terminators from our
2138 // successor is a great thing. Unfortunately, the successors of the if/else
2139 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2140 // must agree for all PHI nodes, so we insert select instruction to compute
2141 // the final result.
2142 if (BI) {
2143 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2144 for (BasicBlock *Succ : successors(BB1)) {
2145 for (PHINode &PN : Succ->phis()) {
2146 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2147 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2148 if (BB1V == BB2V)
2149 continue;
2150
2151 // These values do not agree. Insert a select instruction before NT
2152 // that determines the right value.
2153 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2154 if (!SI) {
2155 // Propagate fast-math-flags from phi node to its replacement select.
2156 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2157 if (isa<FPMathOperator>(PN))
2158 Builder.setFastMathFlags(PN.getFastMathFlags());
2159
2160 SI = cast<SelectInst>(Builder.CreateSelect(
2161 BI->getCondition(), BB1V, BB2V,
2162 BB1V->getName() + "." + BB2V->getName(), BI));
2163 }
2164
2165 // Make the PHI node use the select for all incoming values for BB1/BB2
2166 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2167 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2168 PN.setIncomingValue(i, SI);
2169 }
2170 }
2171 }
2172
2174
2175 // Update any PHI nodes in our new successors.
2176 for (BasicBlock *Succ : successors(BB1)) {
2177 addPredecessorToBlock(Succ, TIParent, BB1);
2178 if (DTU)
2179 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2180 }
2181
2182 if (DTU)
2183 for (BasicBlock *Succ : successors(TI))
2184 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2185
2187 if (DTU)
2188 DTU->applyUpdates(Updates);
2189 return Changed;
2190}
2191
2192// Check lifetime markers.
2193static bool isLifeTimeMarker(const Instruction *I) {
2194 if (auto II = dyn_cast<IntrinsicInst>(I)) {
2195 switch (II->getIntrinsicID()) {
2196 default:
2197 break;
2198 case Intrinsic::lifetime_start:
2199 case Intrinsic::lifetime_end:
2200 return true;
2201 }
2202 }
2203 return false;
2204}
2205
2206// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2207// into variables.
2209 int OpIdx) {
2210 // Divide/Remainder by constant is typically much cheaper than by variable.
2211 if (I->isIntDivRem())
2212 return OpIdx != 1;
2213 return !isa<IntrinsicInst>(I);
2214}
2215
2216// All instructions in Insts belong to different blocks that all unconditionally
2217// branch to a common successor. Analyze each instruction and return true if it
2218// would be possible to sink them into their successor, creating one common
2219// instruction instead. For every value that would be required to be provided by
2220// PHI node (because an operand varies in each input block), add to PHIOperands.
2223 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2224 // Prune out obviously bad instructions to move. Each instruction must have
2225 // the same number of uses, and we check later that the uses are consistent.
2226 std::optional<unsigned> NumUses;
2227 for (auto *I : Insts) {
2228 // These instructions may change or break semantics if moved.
2229 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2230 I->getType()->isTokenTy())
2231 return false;
2232
2233 // Do not try to sink an instruction in an infinite loop - it can cause
2234 // this algorithm to infinite loop.
2235 if (I->getParent()->getSingleSuccessor() == I->getParent())
2236 return false;
2237
2238 // Conservatively return false if I is an inline-asm instruction. Sinking
2239 // and merging inline-asm instructions can potentially create arguments
2240 // that cannot satisfy the inline-asm constraints.
2241 // If the instruction has nomerge or convergent attribute, return false.
2242 if (const auto *C = dyn_cast<CallBase>(I))
2243 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2244 return false;
2245
2246 if (!NumUses)
2247 NumUses = I->getNumUses();
2248 else if (NumUses != I->getNumUses())
2249 return false;
2250 }
2251
2252 const Instruction *I0 = Insts.front();
2253 const auto I0MMRA = MMRAMetadata(*I0);
2254 for (auto *I : Insts) {
2255 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2256 return false;
2257
2258 // swifterror pointers can only be used by a load or store; sinking a load
2259 // or store would require introducing a select for the pointer operand,
2260 // which isn't allowed for swifterror pointers.
2261 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
2262 return false;
2263 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
2264 return false;
2265
2266 // Treat MMRAs conservatively. This pass can be quite aggressive and
2267 // could drop a lot of MMRAs otherwise.
2268 if (MMRAMetadata(*I) != I0MMRA)
2269 return false;
2270 }
2271
2272 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2273 // then the other phi operands must match the instructions from Insts. This
2274 // also has to hold true for any phi nodes that would be created as a result
2275 // of sinking. Both of these cases are represented by PhiOperands.
2276 for (const Use &U : I0->uses()) {
2277 auto It = PHIOperands.find(&U);
2278 if (It == PHIOperands.end())
2279 // There may be uses in other blocks when sinking into a loop header.
2280 return false;
2281 if (!equal(Insts, It->second))
2282 return false;
2283 }
2284
2285 // For calls to be sinkable, they must all be indirect, or have same callee.
2286 // I.e. if we have two direct calls to different callees, we don't want to
2287 // turn that into an indirect call. Likewise, if we have an indirect call,
2288 // and a direct call, we don't actually want to have a single indirect call.
2289 if (isa<CallBase>(I0)) {
2290 auto IsIndirectCall = [](const Instruction *I) {
2291 return cast<CallBase>(I)->isIndirectCall();
2292 };
2293 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2294 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2295 if (HaveIndirectCalls) {
2296 if (!AllCallsAreIndirect)
2297 return false;
2298 } else {
2299 // All callees must be identical.
2300 Value *Callee = nullptr;
2301 for (const Instruction *I : Insts) {
2302 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2303 if (!Callee)
2304 Callee = CurrCallee;
2305 else if (Callee != CurrCallee)
2306 return false;
2307 }
2308 }
2309 }
2310
2311 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2312 Value *Op = I0->getOperand(OI);
2313 if (Op->getType()->isTokenTy())
2314 // Don't touch any operand of token type.
2315 return false;
2316
2317 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2318 assert(I->getNumOperands() == I0->getNumOperands());
2319 return I->getOperand(OI) == I0->getOperand(OI);
2320 };
2321 if (!all_of(Insts, SameAsI0)) {
2322 // SROA can't speculate lifetime markers of selects/phis, and the
2323 // backend may handle such lifetimes incorrectly as well (#104776).
2324 // Don't sink lifetimes if it would introduce a phi on the pointer
2325 // argument.
2326 if (isLifeTimeMarker(I0) && OI == 1 &&
2327 any_of(Insts, [](const Instruction *I) {
2328 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2329 }))
2330 return false;
2331
2332 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2334 // We can't create a PHI from this GEP.
2335 return false;
2336 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2337 for (auto *I : Insts)
2338 Ops.push_back(I->getOperand(OI));
2339 }
2340 }
2341 return true;
2342}
2343
2344// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2345// instruction of every block in Blocks to their common successor, commoning
2346// into one instruction.
2348 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2349
2350 // canSinkInstructions returning true guarantees that every block has at
2351 // least one non-terminator instruction.
2353 for (auto *BB : Blocks) {
2354 Instruction *I = BB->getTerminator();
2355 do {
2356 I = I->getPrevNode();
2357 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2358 if (!isa<DbgInfoIntrinsic>(I))
2359 Insts.push_back(I);
2360 }
2361
2362 // We don't need to do any more checking here; canSinkInstructions should
2363 // have done it all for us.
2364 SmallVector<Value*, 4> NewOperands;
2365 Instruction *I0 = Insts.front();
2366 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2367 // This check is different to that in canSinkInstructions. There, we
2368 // cared about the global view once simplifycfg (and instcombine) have
2369 // completed - it takes into account PHIs that become trivially
2370 // simplifiable. However here we need a more local view; if an operand
2371 // differs we create a PHI and rely on instcombine to clean up the very
2372 // small mess we may make.
2373 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2374 return I->getOperand(O) != I0->getOperand(O);
2375 });
2376 if (!NeedPHI) {
2377 NewOperands.push_back(I0->getOperand(O));
2378 continue;
2379 }
2380
2381 // Create a new PHI in the successor block and populate it.
2382 auto *Op = I0->getOperand(O);
2383 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2384 auto *PN =
2385 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2386 PN->insertBefore(BBEnd->begin());
2387 for (auto *I : Insts)
2388 PN->addIncoming(I->getOperand(O), I->getParent());
2389 NewOperands.push_back(PN);
2390 }
2391
2392 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2393 // and move it to the start of the successor block.
2394 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2395 I0->getOperandUse(O).set(NewOperands[O]);
2396
2397 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2398
2399 // Update metadata and IR flags, and merge debug locations.
2400 for (auto *I : Insts)
2401 if (I != I0) {
2402 // The debug location for the "common" instruction is the merged locations
2403 // of all the commoned instructions. We start with the original location
2404 // of the "common" instruction and iteratively merge each location in the
2405 // loop below.
2406 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2407 // However, as N-way merge for CallInst is rare, so we use simplified API
2408 // instead of using complex API for N-way merge.
2409 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2410 combineMetadataForCSE(I0, I, true);
2411 I0->andIRFlags(I);
2412 if (auto *CB = dyn_cast<CallBase>(I0)) {
2413 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2414 assert(Success && "We should not be trying to sink callbases "
2415 "with non-intersectable attributes");
2416 // For NDEBUG Compile.
2417 (void)Success;
2418 }
2419 }
2420
2421 for (User *U : make_early_inc_range(I0->users())) {
2422 // canSinkLastInstruction checked that all instructions are only used by
2423 // phi nodes in a way that allows replacing the phi node with the common
2424 // instruction.
2425 auto *PN = cast<PHINode>(U);
2426 PN->replaceAllUsesWith(I0);
2427 PN->eraseFromParent();
2428 }
2429
2430 // Finally nuke all instructions apart from the common instruction.
2431 for (auto *I : Insts) {
2432 if (I == I0)
2433 continue;
2434 // The remaining uses are debug users, replace those with the common inst.
2435 // In most (all?) cases this just introduces a use-before-def.
2436 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2437 I->replaceAllUsesWith(I0);
2438 I->eraseFromParent();
2439 }
2440}
2441
2442/// Check whether BB's predecessors end with unconditional branches. If it is
2443/// true, sink any common code from the predecessors to BB.
2445 DomTreeUpdater *DTU) {
2446 // We support two situations:
2447 // (1) all incoming arcs are unconditional
2448 // (2) there are non-unconditional incoming arcs
2449 //
2450 // (2) is very common in switch defaults and
2451 // else-if patterns;
2452 //
2453 // if (a) f(1);
2454 // else if (b) f(2);
2455 //
2456 // produces:
2457 //
2458 // [if]
2459 // / \
2460 // [f(1)] [if]
2461 // | | \
2462 // | | |
2463 // | [f(2)]|
2464 // \ | /
2465 // [ end ]
2466 //
2467 // [end] has two unconditional predecessor arcs and one conditional. The
2468 // conditional refers to the implicit empty 'else' arc. This conditional
2469 // arc can also be caused by an empty default block in a switch.
2470 //
2471 // In this case, we attempt to sink code from all *unconditional* arcs.
2472 // If we can sink instructions from these arcs (determined during the scan
2473 // phase below) we insert a common successor for all unconditional arcs and
2474 // connect that to [end], to enable sinking:
2475 //
2476 // [if]
2477 // / \
2478 // [x(1)] [if]
2479 // | | \
2480 // | | \
2481 // | [x(2)] |
2482 // \ / |
2483 // [sink.split] |
2484 // \ /
2485 // [ end ]
2486 //
2487 SmallVector<BasicBlock*,4> UnconditionalPreds;
2488 bool HaveNonUnconditionalPredecessors = false;
2489 for (auto *PredBB : predecessors(BB)) {
2490 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2491 if (PredBr && PredBr->isUnconditional())
2492 UnconditionalPreds.push_back(PredBB);
2493 else
2494 HaveNonUnconditionalPredecessors = true;
2495 }
2496 if (UnconditionalPreds.size() < 2)
2497 return false;
2498
2499 // We take a two-step approach to tail sinking. First we scan from the end of
2500 // each block upwards in lockstep. If the n'th instruction from the end of each
2501 // block can be sunk, those instructions are added to ValuesToSink and we
2502 // carry on. If we can sink an instruction but need to PHI-merge some operands
2503 // (because they're not identical in each instruction) we add these to
2504 // PHIOperands.
2505 // We prepopulate PHIOperands with the phis that already exist in BB.
2507 for (PHINode &PN : BB->phis()) {
2509 for (const Use &U : PN.incoming_values())
2510 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2511 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2512 for (BasicBlock *Pred : UnconditionalPreds)
2513 Ops.push_back(*IncomingVals[Pred]);
2514 }
2515
2516 int ScanIdx = 0;
2517 SmallPtrSet<Value*,4> InstructionsToSink;
2518 LockstepReverseIterator LRI(UnconditionalPreds);
2519 while (LRI.isValid() &&
2520 canSinkInstructions(*LRI, PHIOperands)) {
2521 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2522 << "\n");
2523 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2524 ++ScanIdx;
2525 --LRI;
2526 }
2527
2528 // If no instructions can be sunk, early-return.
2529 if (ScanIdx == 0)
2530 return false;
2531
2532 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2533
2534 if (!followedByDeoptOrUnreachable) {
2535 // Check whether this is the pointer operand of a load/store.
2536 auto IsMemOperand = [](Use &U) {
2537 auto *I = cast<Instruction>(U.getUser());
2538 if (isa<LoadInst>(I))
2539 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2540 if (isa<StoreInst>(I))
2541 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2542 return false;
2543 };
2544
2545 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2546 // actually sink before encountering instruction that is unprofitable to
2547 // sink?
2548 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2549 unsigned NumPHIInsts = 0;
2550 for (Use &U : (*LRI)[0]->operands()) {
2551 auto It = PHIOperands.find(&U);
2552 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2553 return InstructionsToSink.contains(V);
2554 })) {
2555 ++NumPHIInsts;
2556 // Do not separate a load/store from the gep producing the address.
2557 // The gep can likely be folded into the load/store as an addressing
2558 // mode. Additionally, a load of a gep is easier to analyze than a
2559 // load of a phi.
2560 if (IsMemOperand(U) &&
2561 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2562 return false;
2563 // FIXME: this check is overly optimistic. We may end up not sinking
2564 // said instruction, due to the very same profitability check.
2565 // See @creating_too_many_phis in sink-common-code.ll.
2566 }
2567 }
2568 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2569 return NumPHIInsts <= 1;
2570 };
2571
2572 // We've determined that we are going to sink last ScanIdx instructions,
2573 // and recorded them in InstructionsToSink. Now, some instructions may be
2574 // unprofitable to sink. But that determination depends on the instructions
2575 // that we are going to sink.
2576
2577 // First, forward scan: find the first instruction unprofitable to sink,
2578 // recording all the ones that are profitable to sink.
2579 // FIXME: would it be better, after we detect that not all are profitable.
2580 // to either record the profitable ones, or erase the unprofitable ones?
2581 // Maybe we need to choose (at runtime) the one that will touch least
2582 // instrs?
2583 LRI.reset();
2584 int Idx = 0;
2585 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2586 while (Idx < ScanIdx) {
2587 if (!ProfitableToSinkInstruction(LRI)) {
2588 // Too many PHIs would be created.
2589 LLVM_DEBUG(
2590 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2591 break;
2592 }
2593 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2594 --LRI;
2595 ++Idx;
2596 }
2597
2598 // If no instructions can be sunk, early-return.
2599 if (Idx == 0)
2600 return false;
2601
2602 // Did we determine that (only) some instructions are unprofitable to sink?
2603 if (Idx < ScanIdx) {
2604 // Okay, some instructions are unprofitable.
2605 ScanIdx = Idx;
2606 InstructionsToSink = InstructionsProfitableToSink;
2607
2608 // But, that may make other instructions unprofitable, too.
2609 // So, do a backward scan, do any earlier instructions become
2610 // unprofitable?
2611 assert(
2612 !ProfitableToSinkInstruction(LRI) &&
2613 "We already know that the last instruction is unprofitable to sink");
2614 ++LRI;
2615 --Idx;
2616 while (Idx >= 0) {
2617 // If we detect that an instruction becomes unprofitable to sink,
2618 // all earlier instructions won't be sunk either,
2619 // so preemptively keep InstructionsProfitableToSink in sync.
2620 // FIXME: is this the most performant approach?
2621 for (auto *I : *LRI)
2622 InstructionsProfitableToSink.erase(I);
2623 if (!ProfitableToSinkInstruction(LRI)) {
2624 // Everything starting with this instruction won't be sunk.
2625 ScanIdx = Idx;
2626 InstructionsToSink = InstructionsProfitableToSink;
2627 }
2628 ++LRI;
2629 --Idx;
2630 }
2631 }
2632
2633 // If no instructions can be sunk, early-return.
2634 if (ScanIdx == 0)
2635 return false;
2636 }
2637
2638 bool Changed = false;
2639
2640 if (HaveNonUnconditionalPredecessors) {
2641 if (!followedByDeoptOrUnreachable) {
2642 // It is always legal to sink common instructions from unconditional
2643 // predecessors. However, if not all predecessors are unconditional,
2644 // this transformation might be pessimizing. So as a rule of thumb,
2645 // don't do it unless we'd sink at least one non-speculatable instruction.
2646 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2647 LRI.reset();
2648 int Idx = 0;
2649 bool Profitable = false;
2650 while (Idx < ScanIdx) {
2651 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2652 Profitable = true;
2653 break;
2654 }
2655 --LRI;
2656 ++Idx;
2657 }
2658 if (!Profitable)
2659 return false;
2660 }
2661
2662 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2663 // We have a conditional edge and we're going to sink some instructions.
2664 // Insert a new block postdominating all blocks we're going to sink from.
2665 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2666 // Edges couldn't be split.
2667 return false;
2668 Changed = true;
2669 }
2670
2671 // Now that we've analyzed all potential sinking candidates, perform the
2672 // actual sink. We iteratively sink the last non-terminator of the source
2673 // blocks into their common successor unless doing so would require too
2674 // many PHI instructions to be generated (currently only one PHI is allowed
2675 // per sunk instruction).
2676 //
2677 // We can use InstructionsToSink to discount values needing PHI-merging that will
2678 // actually be sunk in a later iteration. This allows us to be more
2679 // aggressive in what we sink. This does allow a false positive where we
2680 // sink presuming a later value will also be sunk, but stop half way through
2681 // and never actually sink it which means we produce more PHIs than intended.
2682 // This is unlikely in practice though.
2683 int SinkIdx = 0;
2684 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2685 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2686 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2687 << "\n");
2688
2689 // Because we've sunk every instruction in turn, the current instruction to
2690 // sink is always at index 0.
2691 LRI.reset();
2692
2693 sinkLastInstruction(UnconditionalPreds);
2694 NumSinkCommonInstrs++;
2695 Changed = true;
2696 }
2697 if (SinkIdx != 0)
2698 ++NumSinkCommonCode;
2699 return Changed;
2700}
2701
2702namespace {
2703
2704struct CompatibleSets {
2705 using SetTy = SmallVector<InvokeInst *, 2>;
2706
2708
2709 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2710
2711 SetTy &getCompatibleSet(InvokeInst *II);
2712
2713 void insert(InvokeInst *II);
2714};
2715
2716CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2717 // Perform a linear scan over all the existing sets, see if the new `invoke`
2718 // is compatible with any particular set. Since we know that all the `invokes`
2719 // within a set are compatible, only check the first `invoke` in each set.
2720 // WARNING: at worst, this has quadratic complexity.
2721 for (CompatibleSets::SetTy &Set : Sets) {
2722 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2723 return Set;
2724 }
2725
2726 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2727 return Sets.emplace_back();
2728}
2729
2730void CompatibleSets::insert(InvokeInst *II) {
2731 getCompatibleSet(II).emplace_back(II);
2732}
2733
2734bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2735 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2736
2737 // Can we theoretically merge these `invoke`s?
2738 auto IsIllegalToMerge = [](InvokeInst *II) {
2739 return II->cannotMerge() || II->isInlineAsm();
2740 };
2741 if (any_of(Invokes, IsIllegalToMerge))
2742 return false;
2743
2744 // Either both `invoke`s must be direct,
2745 // or both `invoke`s must be indirect.
2746 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2747 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2748 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2749 if (HaveIndirectCalls) {
2750 if (!AllCallsAreIndirect)
2751 return false;
2752 } else {
2753 // All callees must be identical.
2754 Value *Callee = nullptr;
2755 for (InvokeInst *II : Invokes) {
2756 Value *CurrCallee = II->getCalledOperand();
2757 assert(CurrCallee && "There is always a called operand.");
2758 if (!Callee)
2759 Callee = CurrCallee;
2760 else if (Callee != CurrCallee)
2761 return false;
2762 }
2763 }
2764
2765 // Either both `invoke`s must not have a normal destination,
2766 // or both `invoke`s must have a normal destination,
2767 auto HasNormalDest = [](InvokeInst *II) {
2768 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2769 };
2770 if (any_of(Invokes, HasNormalDest)) {
2771 // Do not merge `invoke` that does not have a normal destination with one
2772 // that does have a normal destination, even though doing so would be legal.
2773 if (!all_of(Invokes, HasNormalDest))
2774 return false;
2775
2776 // All normal destinations must be identical.
2777 BasicBlock *NormalBB = nullptr;
2778 for (InvokeInst *II : Invokes) {
2779 BasicBlock *CurrNormalBB = II->getNormalDest();
2780 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2781 if (!NormalBB)
2782 NormalBB = CurrNormalBB;
2783 else if (NormalBB != CurrNormalBB)
2784 return false;
2785 }
2786
2787 // In the normal destination, the incoming values for these two `invoke`s
2788 // must be compatible.
2789 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2791 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2792 &EquivalenceSet))
2793 return false;
2794 }
2795
2796#ifndef NDEBUG
2797 // All unwind destinations must be identical.
2798 // We know that because we have started from said unwind destination.
2799 BasicBlock *UnwindBB = nullptr;
2800 for (InvokeInst *II : Invokes) {
2801 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2802 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2803 if (!UnwindBB)
2804 UnwindBB = CurrUnwindBB;
2805 else
2806 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2807 }
2808#endif
2809
2810 // In the unwind destination, the incoming values for these two `invoke`s
2811 // must be compatible.
2813 Invokes.front()->getUnwindDest(),
2814 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2815 return false;
2816
2817 // Ignoring arguments, these `invoke`s must be identical,
2818 // including operand bundles.
2819 const InvokeInst *II0 = Invokes.front();
2820 for (auto *II : Invokes.drop_front())
2821 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2822 return false;
2823
2824 // Can we theoretically form the data operands for the merged `invoke`?
2825 auto IsIllegalToMergeArguments = [](auto Ops) {
2826 Use &U0 = std::get<0>(Ops);
2827 Use &U1 = std::get<1>(Ops);
2828 if (U0 == U1)
2829 return false;
2830 return U0->getType()->isTokenTy() ||
2831 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2832 U0.getOperandNo());
2833 };
2834 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2835 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2836 IsIllegalToMergeArguments))
2837 return false;
2838
2839 return true;
2840}
2841
2842} // namespace
2843
2844// Merge all invokes in the provided set, all of which are compatible
2845// as per the `CompatibleSets::shouldBelongToSameSet()`.
2847 DomTreeUpdater *DTU) {
2848 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2849
2851 if (DTU)
2852 Updates.reserve(2 + 3 * Invokes.size());
2853
2854 bool HasNormalDest =
2855 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2856
2857 // Clone one of the invokes into a new basic block.
2858 // Since they are all compatible, it doesn't matter which invoke is cloned.
2859 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2860 InvokeInst *II0 = Invokes.front();
2861 BasicBlock *II0BB = II0->getParent();
2862 BasicBlock *InsertBeforeBlock =
2863 II0->getParent()->getIterator()->getNextNode();
2864 Function *Func = II0BB->getParent();
2865 LLVMContext &Ctx = II0->getContext();
2866
2867 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2868 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2869
2870 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2871 // NOTE: all invokes have the same attributes, so no handling needed.
2872 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2873
2874 if (!HasNormalDest) {
2875 // This set does not have a normal destination,
2876 // so just form a new block with unreachable terminator.
2877 BasicBlock *MergedNormalDest = BasicBlock::Create(
2878 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2879 new UnreachableInst(Ctx, MergedNormalDest);
2880 MergedInvoke->setNormalDest(MergedNormalDest);
2881 }
2882
2883 // The unwind destination, however, remainds identical for all invokes here.
2884
2885 return MergedInvoke;
2886 }();
2887
2888 if (DTU) {
2889 // Predecessor blocks that contained these invokes will now branch to
2890 // the new block that contains the merged invoke, ...
2891 for (InvokeInst *II : Invokes)
2892 Updates.push_back(
2893 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2894
2895 // ... which has the new `unreachable` block as normal destination,
2896 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2897 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2898 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2899 SuccBBOfMergedInvoke});
2900
2901 // Since predecessor blocks now unconditionally branch to a new block,
2902 // they no longer branch to their original successors.
2903 for (InvokeInst *II : Invokes)
2904 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2905 Updates.push_back(
2906 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2907 }
2908
2909 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2910
2911 // Form the merged operands for the merged invoke.
2912 for (Use &U : MergedInvoke->operands()) {
2913 // Only PHI together the indirect callees and data operands.
2914 if (MergedInvoke->isCallee(&U)) {
2915 if (!IsIndirectCall)
2916 continue;
2917 } else if (!MergedInvoke->isDataOperand(&U))
2918 continue;
2919
2920 // Don't create trivial PHI's with all-identical incoming values.
2921 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2922 return II->getOperand(U.getOperandNo()) != U.get();
2923 });
2924 if (!NeedPHI)
2925 continue;
2926
2927 // Form a PHI out of all the data ops under this index.
2929 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2930 for (InvokeInst *II : Invokes)
2931 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2932
2933 U.set(PN);
2934 }
2935
2936 // We've ensured that each PHI node has compatible (identical) incoming values
2937 // when coming from each of the `invoke`s in the current merge set,
2938 // so update the PHI nodes accordingly.
2939 for (BasicBlock *Succ : successors(MergedInvoke))
2940 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2941 /*ExistPred=*/Invokes.front()->getParent());
2942
2943 // And finally, replace the original `invoke`s with an unconditional branch
2944 // to the block with the merged `invoke`. Also, give that merged `invoke`
2945 // the merged debugloc of all the original `invoke`s.
2946 DILocation *MergedDebugLoc = nullptr;
2947 for (InvokeInst *II : Invokes) {
2948 // Compute the debug location common to all the original `invoke`s.
2949 if (!MergedDebugLoc)
2950 MergedDebugLoc = II->getDebugLoc();
2951 else
2952 MergedDebugLoc =
2953 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2954
2955 // And replace the old `invoke` with an unconditionally branch
2956 // to the block with the merged `invoke`.
2957 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2958 OrigSuccBB->removePredecessor(II->getParent());
2959 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2960 // The unconditional branch is part of the replacement for the original
2961 // invoke, so should use its DebugLoc.
2962 BI->setDebugLoc(II->getDebugLoc());
2963 bool Success = MergedInvoke->tryIntersectAttributes(II);
2964 assert(Success && "Merged invokes with incompatible attributes");
2965 // For NDEBUG Compile
2966 (void)Success;
2967 II->replaceAllUsesWith(MergedInvoke);
2968 II->eraseFromParent();
2969 ++NumInvokesMerged;
2970 }
2971 MergedInvoke->setDebugLoc(MergedDebugLoc);
2972 ++NumInvokeSetsFormed;
2973
2974 if (DTU)
2975 DTU->applyUpdates(Updates);
2976}
2977
2978/// If this block is a `landingpad` exception handling block, categorize all
2979/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2980/// being "mergeable" together, and then merge invokes in each set together.
2981///
2982/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2983/// [...] [...]
2984/// | |
2985/// [invoke0] [invoke1]
2986/// / \ / \
2987/// [cont0] [landingpad] [cont1]
2988/// to:
2989/// [...] [...]
2990/// \ /
2991/// [invoke]
2992/// / \
2993/// [cont] [landingpad]
2994///
2995/// But of course we can only do that if the invokes share the `landingpad`,
2996/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2997/// and the invoked functions are "compatible".
3000 return false;
3001
3002 bool Changed = false;
3003
3004 // FIXME: generalize to all exception handling blocks?
3005 if (!BB->isLandingPad())
3006 return Changed;
3007
3008 CompatibleSets Grouper;
3009
3010 // Record all the predecessors of this `landingpad`. As per verifier,
3011 // the only allowed predecessor is the unwind edge of an `invoke`.
3012 // We want to group "compatible" `invokes` into the same set to be merged.
3013 for (BasicBlock *PredBB : predecessors(BB))
3014 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
3015
3016 // And now, merge `invoke`s that were grouped togeter.
3017 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
3018 if (Invokes.size() < 2)
3019 continue;
3020 Changed = true;
3021 mergeCompatibleInvokesImpl(Invokes, DTU);
3022 }
3023
3024 return Changed;
3025}
3026
3027namespace {
3028/// Track ephemeral values, which should be ignored for cost-modelling
3029/// purposes. Requires walking instructions in reverse order.
3030class EphemeralValueTracker {
3032
3033 bool isEphemeral(const Instruction *I) {
3034 if (isa<AssumeInst>(I))
3035 return true;
3036 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3037 all_of(I->users(), [&](const User *U) {
3038 return EphValues.count(cast<Instruction>(U));
3039 });
3040 }
3041
3042public:
3043 bool track(const Instruction *I) {
3044 if (isEphemeral(I)) {
3045 EphValues.insert(I);
3046 return true;
3047 }
3048 return false;
3049 }
3050
3051 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3052};
3053} // namespace
3054
3055/// Determine if we can hoist sink a sole store instruction out of a
3056/// conditional block.
3057///
3058/// We are looking for code like the following:
3059/// BrBB:
3060/// store i32 %add, i32* %arrayidx2
3061/// ... // No other stores or function calls (we could be calling a memory
3062/// ... // function).
3063/// %cmp = icmp ult %x, %y
3064/// br i1 %cmp, label %EndBB, label %ThenBB
3065/// ThenBB:
3066/// store i32 %add5, i32* %arrayidx2
3067/// br label EndBB
3068/// EndBB:
3069/// ...
3070/// We are going to transform this into:
3071/// BrBB:
3072/// store i32 %add, i32* %arrayidx2
3073/// ... //
3074/// %cmp = icmp ult %x, %y
3075/// %add.add5 = select i1 %cmp, i32 %add, %add5
3076/// store i32 %add.add5, i32* %arrayidx2
3077/// ...
3078///
3079/// \return The pointer to the value of the previous store if the store can be
3080/// hoisted into the predecessor block. 0 otherwise.
3082 BasicBlock *StoreBB, BasicBlock *EndBB) {
3083 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3084 if (!StoreToHoist)
3085 return nullptr;
3086
3087 // Volatile or atomic.
3088 if (!StoreToHoist->isSimple())
3089 return nullptr;
3090
3091 Value *StorePtr = StoreToHoist->getPointerOperand();
3092 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3093
3094 // Look for a store to the same pointer in BrBB.
3095 unsigned MaxNumInstToLookAt = 9;
3096 // Skip pseudo probe intrinsic calls which are not really killing any memory
3097 // accesses.
3098 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3099 if (!MaxNumInstToLookAt)
3100 break;
3101 --MaxNumInstToLookAt;
3102
3103 // Could be calling an instruction that affects memory like free().
3104 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3105 return nullptr;
3106
3107 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3108 // Found the previous store to same location and type. Make sure it is
3109 // simple, to avoid introducing a spurious non-atomic write after an
3110 // atomic write.
3111 if (SI->getPointerOperand() == StorePtr &&
3112 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3113 SI->getAlign() >= StoreToHoist->getAlign())
3114 // Found the previous store, return its value operand.
3115 return SI->getValueOperand();
3116 return nullptr; // Unknown store.
3117 }
3118
3119 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3120 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3121 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3122 Value *Obj = getUnderlyingObject(StorePtr);
3123 bool ExplicitlyDereferenceableOnly;
3124 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3125 !PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3126 /*StoreCaptures=*/true) &&
3127 (!ExplicitlyDereferenceableOnly ||
3128 isDereferenceablePointer(StorePtr, StoreTy,
3129 LI->getDataLayout()))) {
3130 // Found a previous load, return it.
3131 return LI;
3132 }
3133 }
3134 // The load didn't work out, but we may still find a store.
3135 }
3136 }
3137
3138 return nullptr;
3139}
3140
3141/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3142/// converted to selects.
3144 BasicBlock *EndBB,
3145 unsigned &SpeculatedInstructions,
3147 const TargetTransformInfo &TTI) {
3149 BB->getParent()->hasMinSize()
3152
3153 bool HaveRewritablePHIs = false;
3154 for (PHINode &PN : EndBB->phis()) {
3155 Value *OrigV = PN.getIncomingValueForBlock(BB);
3156 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3157
3158 // FIXME: Try to remove some of the duplication with
3159 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3160 if (ThenV == OrigV)
3161 continue;
3162
3163 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
3165
3166 // Don't convert to selects if we could remove undefined behavior instead.
3167 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3169 return false;
3170
3171 HaveRewritablePHIs = true;
3172 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3173 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3174 if (!OrigCE && !ThenCE)
3175 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3176
3177 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3178 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3179 InstructionCost MaxCost =
3181 if (OrigCost + ThenCost > MaxCost)
3182 return false;
3183
3184 // Account for the cost of an unfolded ConstantExpr which could end up
3185 // getting expanded into Instructions.
3186 // FIXME: This doesn't account for how many operations are combined in the
3187 // constant expression.
3188 ++SpeculatedInstructions;
3189 if (SpeculatedInstructions > 1)
3190 return false;
3191 }
3192
3193 return HaveRewritablePHIs;
3194}
3195
3197 std::optional<bool> Invert,
3198 const TargetTransformInfo &TTI) {
3199 // If the branch is non-unpredictable, and is predicted to *not* branch to
3200 // the `then` block, then avoid speculating it.
3201 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3202 return true;
3203
3204 uint64_t TWeight, FWeight;
3205 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3206 return true;
3207
3208 if (!Invert.has_value())
3209 return false;
3210
3211 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3212 BranchProbability BIEndProb =
3213 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3215 return BIEndProb < Likely;
3216}
3217
3218/// Speculate a conditional basic block flattening the CFG.
3219///
3220/// Note that this is a very risky transform currently. Speculating
3221/// instructions like this is most often not desirable. Instead, there is an MI
3222/// pass which can do it with full awareness of the resource constraints.
3223/// However, some cases are "obvious" and we should do directly. An example of
3224/// this is speculating a single, reasonably cheap instruction.
3225///
3226/// There is only one distinct advantage to flattening the CFG at the IR level:
3227/// it makes very common but simplistic optimizations such as are common in
3228/// instcombine and the DAG combiner more powerful by removing CFG edges and
3229/// modeling their effects with easier to reason about SSA value graphs.
3230///
3231///
3232/// An illustration of this transform is turning this IR:
3233/// \code
3234/// BB:
3235/// %cmp = icmp ult %x, %y
3236/// br i1 %cmp, label %EndBB, label %ThenBB
3237/// ThenBB:
3238/// %sub = sub %x, %y
3239/// br label BB2
3240/// EndBB:
3241/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3242/// ...
3243/// \endcode
3244///
3245/// Into this IR:
3246/// \code
3247/// BB:
3248/// %cmp = icmp ult %x, %y
3249/// %sub = sub %x, %y
3250/// %cond = select i1 %cmp, 0, %sub
3251/// ...
3252/// \endcode
3253///
3254/// \returns true if the conditional block is removed.
3255bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3256 BasicBlock *ThenBB) {
3257 if (!Options.SpeculateBlocks)
3258 return false;
3259
3260 // Be conservative for now. FP select instruction can often be expensive.
3261 Value *BrCond = BI->getCondition();
3262 if (isa<FCmpInst>(BrCond))
3263 return false;
3264
3265 BasicBlock *BB = BI->getParent();
3266 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3267 InstructionCost Budget =
3269
3270 // If ThenBB is actually on the false edge of the conditional branch, remember
3271 // to swap the select operands later.
3272 bool Invert = false;
3273 if (ThenBB != BI->getSuccessor(0)) {
3274 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3275 Invert = true;
3276 }
3277 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3278
3279 if (!isProfitableToSpeculate(BI, Invert, TTI))
3280 return false;
3281
3282 // Keep a count of how many times instructions are used within ThenBB when
3283 // they are candidates for sinking into ThenBB. Specifically:
3284 // - They are defined in BB, and
3285 // - They have no side effects, and
3286 // - All of their uses are in ThenBB.
3287 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3288
3289 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3290
3291 unsigned SpeculatedInstructions = 0;
3292 bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3293 Options.HoistLoadsStoresWithCondFaulting;
3294 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3295 Value *SpeculatedStoreValue = nullptr;
3296 StoreInst *SpeculatedStore = nullptr;
3297 EphemeralValueTracker EphTracker;
3298 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3299 // Skip debug info.
3300 if (isa<DbgInfoIntrinsic>(I)) {
3301 SpeculatedDbgIntrinsics.push_back(&I);
3302 continue;
3303 }
3304
3305 // Skip pseudo probes. The consequence is we lose track of the branch
3306 // probability for ThenBB, which is fine since the optimization here takes
3307 // place regardless of the branch probability.
3308 if (isa<PseudoProbeInst>(I)) {
3309 // The probe should be deleted so that it will not be over-counted when
3310 // the samples collected on the non-conditional path are counted towards
3311 // the conditional path. We leave it for the counts inference algorithm to
3312 // figure out a proper count for an unknown probe.
3313 SpeculatedDbgIntrinsics.push_back(&I);
3314 continue;
3315 }
3316
3317 // Ignore ephemeral values, they will be dropped by the transform.
3318 if (EphTracker.track(&I))
3319 continue;
3320
3321 // Only speculatively execute a single instruction (not counting the
3322 // terminator) for now.
3323 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3325 SpeculatedConditionalLoadsStores.size() <
3327 // Not count load/store into cost if target supports conditional faulting
3328 // b/c it's cheap to speculate it.
3329 if (IsSafeCheapLoadStore)
3330 SpeculatedConditionalLoadsStores.push_back(&I);
3331 else
3332 ++SpeculatedInstructions;
3333
3334 if (SpeculatedInstructions > 1)
3335 return false;
3336
3337 // Don't hoist the instruction if it's unsafe or expensive.
3338 if (!IsSafeCheapLoadStore &&
3340 !(HoistCondStores && !SpeculatedStoreValue &&
3341 (SpeculatedStoreValue =
3342 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3343 return false;
3344 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3347 return false;
3348
3349 // Store the store speculation candidate.
3350 if (!SpeculatedStore && SpeculatedStoreValue)
3351 SpeculatedStore = cast<StoreInst>(&I);
3352
3353 // Do not hoist the instruction if any of its operands are defined but not
3354 // used in BB. The transformation will prevent the operand from
3355 // being sunk into the use block.
3356 for (Use &Op : I.operands()) {
3357 Instruction *OpI = dyn_cast<Instruction>(Op);
3358 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3359 continue; // Not a candidate for sinking.
3360
3361 ++SinkCandidateUseCounts[OpI];
3362 }
3363 }
3364
3365 // Consider any sink candidates which are only used in ThenBB as costs for
3366 // speculation. Note, while we iterate over a DenseMap here, we are summing
3367 // and so iteration order isn't significant.
3368 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3369 if (Inst->hasNUses(Count)) {
3370 ++SpeculatedInstructions;
3371 if (SpeculatedInstructions > 1)
3372 return false;
3373 }
3374
3375 // Check that we can insert the selects and that it's not too expensive to do
3376 // so.
3377 bool Convert =
3378 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3380 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3381 SpeculatedInstructions, Cost, TTI);
3382 if (!Convert || Cost > Budget)
3383 return false;
3384
3385 // If we get here, we can hoist the instruction and if-convert.
3386 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3387
3388 // Insert a select of the value of the speculated store.
3389 if (SpeculatedStoreValue) {
3390 IRBuilder<NoFolder> Builder(BI);
3391 Value *OrigV = SpeculatedStore->getValueOperand();
3392 Value *TrueV = SpeculatedStore->getValueOperand();
3393 Value *FalseV = SpeculatedStoreValue;
3394 if (Invert)
3395 std::swap(TrueV, FalseV);
3396 Value *S = Builder.CreateSelect(
3397 BrCond, TrueV, FalseV, "spec.store.select", BI);
3398 SpeculatedStore->setOperand(0, S);
3399 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3400 SpeculatedStore->getDebugLoc());
3401 // The value stored is still conditional, but the store itself is now
3402 // unconditonally executed, so we must be sure that any linked dbg.assign
3403 // intrinsics are tracking the new stored value (the result of the
3404 // select). If we don't, and the store were to be removed by another pass
3405 // (e.g. DSE), then we'd eventually end up emitting a location describing
3406 // the conditional value, unconditionally.
3407 //
3408 // === Before this transformation ===
3409 // pred:
3410 // store %one, %x.dest, !DIAssignID !1
3411 // dbg.assign %one, "x", ..., !1, ...
3412 // br %cond if.then
3413 //
3414 // if.then:
3415 // store %two, %x.dest, !DIAssignID !2
3416 // dbg.assign %two, "x", ..., !2, ...
3417 //
3418 // === After this transformation ===
3419 // pred:
3420 // store %one, %x.dest, !DIAssignID !1
3421 // dbg.assign %one, "x", ..., !1
3422 /// ...
3423 // %merge = select %cond, %two, %one
3424 // store %merge, %x.dest, !DIAssignID !2
3425 // dbg.assign %merge, "x", ..., !2
3426 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3427 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3428 DbgAssign->replaceVariableLocationOp(OrigV, S);
3429 };
3430 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3431 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3432 }
3433
3434 // Metadata can be dependent on the condition we are hoisting above.
3435 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3436 // to avoid making it appear as if the condition is a constant, which would
3437 // be misleading while debugging.
3438 // Similarly strip attributes that maybe dependent on condition we are
3439 // hoisting above.
3440 for (auto &I : make_early_inc_range(*ThenBB)) {
3441 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3442 // Don't update the DILocation of dbg.assign intrinsics.
3443 if (!isa<DbgAssignIntrinsic>(&I))
3444 I.setDebugLoc(DebugLoc());
3445 }
3446 I.dropUBImplyingAttrsAndMetadata();
3447
3448 // Drop ephemeral values.
3449 if (EphTracker.contains(&I)) {
3450 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3451 I.eraseFromParent();
3452 }
3453 }
3454
3455 // Hoist the instructions.
3456 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3457 // to these instructions, in the same way that dbg.value intrinsics are
3458 // dropped at the end of this block.
3459 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3460 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3461 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3462 // equivalent).
3463 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3464 !DVR || !DVR->isDbgAssign())
3465 It.dropOneDbgRecord(&DR);
3466 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3467 std::prev(ThenBB->end()));
3468
3469 if (!SpeculatedConditionalLoadsStores.empty())
3470 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
3471
3472 // Insert selects and rewrite the PHI operands.
3473 IRBuilder<NoFolder> Builder(BI);
3474 for (PHINode &PN : EndBB->phis()) {
3475 unsigned OrigI = PN.getBasicBlockIndex(BB);
3476 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3477 Value *OrigV = PN.getIncomingValue(OrigI);
3478 Value *ThenV = PN.getIncomingValue(ThenI);
3479
3480 // Skip PHIs which are trivial.
3481 if (OrigV == ThenV)
3482 continue;
3483
3484 // Create a select whose true value is the speculatively executed value and
3485 // false value is the pre-existing value. Swap them if the branch
3486 // destinations were inverted.
3487 Value *TrueV = ThenV, *FalseV = OrigV;
3488 if (Invert)
3489 std::swap(TrueV, FalseV);
3490 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3491 PN.setIncomingValue(OrigI, V);
3492 PN.setIncomingValue(ThenI, V);
3493 }
3494
3495 // Remove speculated dbg intrinsics.
3496 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3497 // dbg value for the different flows and inserting it after the select.
3498 for (Instruction *I : SpeculatedDbgIntrinsics) {
3499 // We still want to know that an assignment took place so don't remove
3500 // dbg.assign intrinsics.
3501 if (!isa<DbgAssignIntrinsic>(I))
3502 I->eraseFromParent();
3503 }
3504
3505 ++NumSpeculations;
3506 return true;
3507}
3508
3509/// Return true if we can thread a branch across this block.
3511 int Size = 0;
3512 EphemeralValueTracker EphTracker;
3513
3514 // Walk the loop in reverse so that we can identify ephemeral values properly
3515 // (values only feeding assumes).
3516 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3517 // Can't fold blocks that contain noduplicate or convergent calls.
3518 if (CallInst *CI = dyn_cast<CallInst>(&I))
3519 if (CI->cannotDuplicate() || CI->isConvergent())
3520 return false;
3521
3522 // Ignore ephemeral values which are deleted during codegen.
3523 // We will delete Phis while threading, so Phis should not be accounted in
3524 // block's size.
3525 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3526 if (Size++ > MaxSmallBlockSize)
3527 return false; // Don't clone large BB's.
3528 }
3529
3530 // We can only support instructions that do not define values that are
3531 // live outside of the current basic block.
3532 for (User *U : I.users()) {
3533 Instruction *UI = cast<Instruction>(U);
3534 if (UI->getParent() != BB || isa<PHINode>(UI))
3535 return false;
3536 }
3537
3538 // Looks ok, continue checking.
3539 }
3540
3541 return true;
3542}
3543
3545 BasicBlock *To) {
3546 // Don't look past the block defining the value, we might get the value from
3547 // a previous loop iteration.
3548 auto *I = dyn_cast<Instruction>(V);
3549 if (I && I->getParent() == To)
3550 return nullptr;
3551
3552 // We know the value if the From block branches on it.
3553 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3554 if (BI && BI->isConditional() && BI->getCondition() == V &&
3555 BI->getSuccessor(0) != BI->getSuccessor(1))
3556 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3558
3559 return nullptr;
3560}
3561
3562/// If we have a conditional branch on something for which we know the constant
3563/// value in predecessors (e.g. a phi node in the current block), thread edges
3564/// from the predecessor to their ultimate destination.
3565static std::optional<bool>
3567 const DataLayout &DL,
3568 AssumptionCache *AC) {
3570 BasicBlock *BB = BI->getParent();
3571 Value *Cond = BI->getCondition();
3572 PHINode *PN = dyn_cast<PHINode>(Cond);
3573 if (PN && PN->getParent() == BB) {
3574 // Degenerate case of a single entry PHI.
3575 if (PN->getNumIncomingValues() == 1) {
3577 return true;
3578 }
3579
3580 for (Use &U : PN->incoming_values())
3581 if (auto *CB = dyn_cast<ConstantInt>(U))
3582 KnownValues[CB].insert(PN->getIncomingBlock(U));
3583 } else {
3584 for (BasicBlock *Pred : predecessors(BB)) {
3585 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3586 KnownValues[CB].insert(Pred);
3587 }
3588 }
3589
3590 if (KnownValues.empty())
3591 return false;
3592
3593 // Now we know that this block has multiple preds and two succs.
3594 // Check that the block is small enough and values defined in the block are
3595 // not used outside of it.
3597 return false;
3598
3599 for (const auto &Pair : KnownValues) {
3600 // Okay, we now know that all edges from PredBB should be revectored to
3601 // branch to RealDest.
3602 ConstantInt *CB = Pair.first;
3603 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3604 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3605
3606 if (RealDest == BB)
3607 continue; // Skip self loops.
3608
3609 // Skip if the predecessor's terminator is an indirect branch.
3610 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3611 return isa<IndirectBrInst>(PredBB->getTerminator());
3612 }))
3613 continue;
3614
3615 LLVM_DEBUG({
3616 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3617 << " has value " << *Pair.first << " in predecessors:\n";
3618 for (const BasicBlock *PredBB : Pair.second)
3619 dbgs() << " " << PredBB->getName() << "\n";
3620 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3621 });
3622
3623 // Split the predecessors we are threading into a new edge block. We'll
3624 // clone the instructions into this block, and then redirect it to RealDest.
3625 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3626
3627 // TODO: These just exist to reduce test diff, we can drop them if we like.
3628 EdgeBB->setName(RealDest->getName() + ".critedge");
3629 EdgeBB->moveBefore(RealDest);
3630
3631 // Update PHI nodes.
3632 addPredecessorToBlock(RealDest, EdgeBB, BB);
3633
3634 // BB may have instructions that are being threaded over. Clone these
3635 // instructions into EdgeBB. We know that there will be no uses of the
3636 // cloned instructions outside of EdgeBB.
3637 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3638 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3639 TranslateMap[Cond] = CB;
3640
3641 // RemoveDIs: track instructions that we optimise away while folding, so
3642 // that we can copy DbgVariableRecords from them later.
3643 BasicBlock::iterator SrcDbgCursor = BB->begin();
3644 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3645 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3646 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3647 continue;
3648 }
3649 // Clone the instruction.
3650 Instruction *N = BBI->clone();
3651 // Insert the new instruction into its new home.
3652 N->insertInto(EdgeBB, InsertPt);
3653
3654 if (BBI->hasName())
3655 N->setName(BBI->getName() + ".c");
3656
3657 // Update operands due to translation.
3658 for (Use &Op : N->operands()) {
3659 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3660 if (PI != TranslateMap.end())
3661 Op = PI->second;
3662 }
3663
3664 // Check for trivial simplification.
3665 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3666 if (!BBI->use_empty())
3667 TranslateMap[&*BBI] = V;
3668 if (!N->mayHaveSideEffects()) {
3669 N->eraseFromParent(); // Instruction folded away, don't need actual
3670 // inst
3671 N = nullptr;
3672 }
3673 } else {
3674 if (!BBI->use_empty())
3675 TranslateMap[&*BBI] = N;
3676 }
3677 if (N) {
3678 // Copy all debug-info attached to instructions from the last we
3679 // successfully clone, up to this instruction (they might have been
3680 // folded away).
3681 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3682 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3683 SrcDbgCursor = std::next(BBI);
3684 // Clone debug-info on this instruction too.
3685 N->cloneDebugInfoFrom(&*BBI);
3686
3687 // Register the new instruction with the assumption cache if necessary.
3688 if (auto *Assume = dyn_cast<AssumeInst>(N))
3689 if (AC)
3690 AC->registerAssumption(Assume);
3691 }
3692 }
3693
3694 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3695 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3696 InsertPt->cloneDebugInfoFrom(BI);
3697
3698 BB->removePredecessor(EdgeBB);
3699 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3700 EdgeBI->setSuccessor(0, RealDest);
3701 EdgeBI->setDebugLoc(BI->getDebugLoc());
3702
3703 if (DTU) {
3705 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3706 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3707 DTU->applyUpdates(Updates);
3708 }
3709
3710 // For simplicity, we created a separate basic block for the edge. Merge
3711 // it back into the predecessor if possible. This not only avoids
3712 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3713 // bypass the check for trivial cycles above.
3714 MergeBlockIntoPredecessor(EdgeBB, DTU);
3715
3716 // Signal repeat, simplifying any other constants.
3717 return std::nullopt;
3718 }
3719
3720 return false;
3721}
3722
3724 DomTreeUpdater *DTU,
3725 const DataLayout &DL,
3726 AssumptionCache *AC) {
3727 std::optional<bool> Result;
3728 bool EverChanged = false;
3729 do {
3730 // Note that None means "we changed things, but recurse further."
3731 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3732 EverChanged |= Result == std::nullopt || *Result;
3733 } while (Result == std::nullopt);
3734 return EverChanged;
3735}
3736
3737/// Given a BB that starts with the specified two-entry PHI node,
3738/// see if we can eliminate it.
3741 const DataLayout &DL,
3742 bool SpeculateUnpredictables) {
3743 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3744 // statement", which has a very simple dominance structure. Basically, we
3745 // are trying to find the condition that is being branched on, which
3746 // subsequently causes this merge to happen. We really want control
3747 // dependence information for this check, but simplifycfg can't keep it up
3748 // to date, and this catches most of the cases we care about anyway.
3749 BasicBlock *BB = PN->getParent();
3750
3751 BasicBlock *IfTrue, *IfFalse;
3752 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3753 if (!DomBI)
3754 return false;
3755 Value *IfCond = DomBI->getCondition();
3756 // Don't bother if the branch will be constant folded trivially.
3757 if (isa<ConstantInt>(IfCond))
3758 return false;
3759
3760 BasicBlock *DomBlock = DomBI->getParent();
3763 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3764 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3765 });
3766 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3767 "Will have either one or two blocks to speculate.");
3768
3769 // If the branch is non-unpredictable, see if we either predictably jump to
3770 // the merge bb (if we have only a single 'then' block), or if we predictably
3771 // jump to one specific 'then' block (if we have two of them).
3772 // It isn't beneficial to speculatively execute the code
3773 // from the block that we know is predictably not entered.
3774 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3775 if (!IsUnpredictable) {
3776 uint64_t TWeight, FWeight;
3777 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3778 (TWeight + FWeight) != 0) {
3779 BranchProbability BITrueProb =
3780 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3782 BranchProbability BIFalseProb = BITrueProb.getCompl();
3783 if (IfBlocks.size() == 1) {
3784 BranchProbability BIBBProb =
3785 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3786 if (BIBBProb >= Likely)
3787 return false;
3788 } else {
3789 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3790 return false;
3791 }
3792 }
3793 }
3794
3795 // Don't try to fold an unreachable block. For example, the phi node itself
3796 // can't be the candidate if-condition for a select that we want to form.
3797 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3798 if (IfCondPhiInst->getParent() == BB)
3799 return false;
3800
3801 // Okay, we found that we can merge this two-entry phi node into a select.
3802 // Doing so would require us to fold *all* two entry phi nodes in this block.
3803 // At some point this becomes non-profitable (particularly if the target
3804 // doesn't support cmov's). Only do this transformation if there are two or
3805 // fewer PHI nodes in this block.
3806 unsigned NumPhis = 0;
3807 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3808 if (NumPhis > 2)
3809 return false;
3810
3811 // Loop over the PHI's seeing if we can promote them all to select
3812 // instructions. While we are at it, keep track of the instructions
3813 // that need to be moved to the dominating block.
3814 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3816 InstructionCost Budget =
3818 if (SpeculateUnpredictables && IsUnpredictable)
3819 Budget += TTI.getBranchMispredictPenalty();
3820
3821 bool Changed = false;
3822 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3823 PHINode *PN = cast<PHINode>(II++);
3824 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3825 PN->replaceAllUsesWith(V);
3826 PN->eraseFromParent();
3827 Changed = true;
3828 continue;
3829 }
3830
3831 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3832 AggressiveInsts, Cost, Budget, TTI, AC) ||
3833 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3834 AggressiveInsts, Cost, Budget, TTI, AC))
3835 return Changed;
3836 }
3837
3838 // If we folded the first phi, PN dangles at this point. Refresh it. If
3839 // we ran out of PHIs then we simplified them all.
3840 PN = dyn_cast<PHINode>(BB->begin());
3841 if (!PN)
3842 return true;
3843
3844 // Return true if at least one of these is a 'not', and another is either
3845 // a 'not' too, or a constant.
3846 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3847 if (!match(V0, m_Not(m_Value())))
3848 std::swap(V0, V1);
3849 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3850 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3851 };
3852
3853 // Don't fold i1 branches on PHIs which contain binary operators or
3854 // (possibly inverted) select form of or/ands, unless one of
3855 // the incoming values is an 'not' and another one is freely invertible.
3856 // These can often be turned into switches and other things.
3857 auto IsBinOpOrAnd = [](Value *V) {
3858 return match(
3860 };
3861 if (PN->getType()->isIntegerTy(1) &&
3862 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3863 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3864 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3865 PN->getIncomingValue(1)))
3866 return Changed;
3867
3868 // If all PHI nodes are promotable, check to make sure that all instructions
3869 // in the predecessor blocks can be promoted as well. If not, we won't be able
3870 // to get rid of the control flow, so it's not worth promoting to select
3871 // instructions.
3872 for (BasicBlock *IfBlock : IfBlocks)
3873 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3874 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3875 // This is not an aggressive instruction that we can promote.
3876 // Because of this, we won't be able to get rid of the control flow, so
3877 // the xform is not worth it.
3878 return Changed;
3879 }
3880
3881 // If either of the blocks has it's address taken, we can't do this fold.
3882 if (any_of(IfBlocks,
3883 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3884 return Changed;
3885
3886 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3887 if (IsUnpredictable) dbgs() << " (unpredictable)";
3888 dbgs() << " T: " << IfTrue->getName()
3889 << " F: " << IfFalse->getName() << "\n");
3890
3891 // If we can still promote the PHI nodes after this gauntlet of tests,
3892 // do all of the PHI's now.
3893
3894 // Move all 'aggressive' instructions, which are defined in the
3895 // conditional parts of the if's up to the dominating block.
3896 for (BasicBlock *IfBlock : IfBlocks)
3897 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3898
3899 IRBuilder<NoFolder> Builder(DomBI);
3900 // Propagate fast-math-flags from phi nodes to replacement selects.
3901 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3902 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3903 if (isa<FPMathOperator>(PN))
3904 Builder.setFastMathFlags(PN->getFastMathFlags());
3905
3906 // Change the PHI node into a select instruction.
3907 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3908 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3909
3910 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3911 PN->replaceAllUsesWith(Sel);
3912 Sel->takeName(PN);
3913 PN->eraseFromParent();
3914 }
3915
3916 // At this point, all IfBlocks are empty, so our if statement
3917 // has been flattened. Change DomBlock to jump directly to our new block to
3918 // avoid other simplifycfg's kicking in on the diamond.
3919 Builder.CreateBr(BB);
3920
3922 if (DTU) {
3923 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3924 for (auto *Successor : successors(DomBlock))
3925 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3926 }
3927
3928 DomBI->eraseFromParent();
3929 if (DTU)
3930 DTU->applyUpdates(Updates);
3931
3932 return true;
3933}
3934
3936 Instruction::BinaryOps Opc, Value *LHS,
3937 Value *RHS, const Twine &Name = "") {
3938 // Try to relax logical op to binary op.
3939 if (impliesPoison(RHS, LHS))
3940 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3941 if (Opc == Instruction::And)
3942 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3943 if (Opc == Instruction::Or)
3944 return Builder.CreateLogicalOr(LHS, RHS, Name);
3945 llvm_unreachable("Invalid logical opcode");
3946}
3947
3948/// Return true if either PBI or BI has branch weight available, and store
3949/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3950/// not have branch weight, use 1:1 as its weight.
3952 uint64_t &PredTrueWeight,
3953 uint64_t &PredFalseWeight,
3954 uint64_t &SuccTrueWeight,
3955 uint64_t &SuccFalseWeight) {
3956 bool PredHasWeights =
3957 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3958 bool SuccHasWeights =
3959 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3960 if (PredHasWeights || SuccHasWeights) {
3961 if (!PredHasWeights)
3962 PredTrueWeight = PredFalseWeight = 1;
3963 if (!SuccHasWeights)
3964 SuccTrueWeight = SuccFalseWeight = 1;
3965 return true;
3966 } else {
3967 return false;
3968 }
3969}
3970
3971/// Determine if the two branches share a common destination and deduce a glue
3972/// that joins the branches' conditions to arrive at the common destination if
3973/// that would be profitable.
3974static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3976 const TargetTransformInfo *TTI) {
3977 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3978 "Both blocks must end with a conditional branches.");
3980 "PredBB must be a predecessor of BB.");
3981
3982 // We have the potential to fold the conditions together, but if the
3983 // predecessor branch is predictable, we may not want to merge them.
3984 uint64_t PTWeight, PFWeight;
3985 BranchProbability PBITrueProb, Likely;
3986 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3987 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3988 (PTWeight + PFWeight) != 0) {
3989 PBITrueProb =
3990 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3992 }
3993
3994 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3995 // Speculate the 2nd condition unless the 1st is probably true.
3996 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3997 return {{BI->getSuccessor(0), Instruction::Or, false}};
3998 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3999 // Speculate the 2nd condition unless the 1st is probably false.
4000 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4001 return {{BI->getSuccessor(1), Instruction::And, false}};
4002 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4003 // Speculate the 2nd condition unless the 1st is probably true.
4004 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4005 return {{BI->getSuccessor(1), Instruction::And, true}};
4006 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4007 // Speculate the 2nd condition unless the 1st is probably false.
4008 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4009 return {{BI->getSuccessor(0), Instruction::Or, true}};
4010 }
4011 return std::nullopt;
4012}
4013
4015 DomTreeUpdater *DTU,
4016 MemorySSAUpdater *MSSAU,
4017 const TargetTransformInfo *TTI) {
4018 BasicBlock *BB = BI->getParent();
4019 BasicBlock *PredBlock = PBI->getParent();
4020
4021 // Determine if the two branches share a common destination.
4022 BasicBlock *CommonSucc;
4024 bool InvertPredCond;
4025 std::tie(CommonSucc, Opc, InvertPredCond) =
4027
4028 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4029
4030 IRBuilder<> Builder(PBI);
4031 // The builder is used to create instructions to eliminate the branch in BB.
4032 // If BB's terminator has !annotation metadata, add it to the new
4033 // instructions.
4035 {LLVMContext::MD_annotation});
4036
4037 // If we need to invert the condition in the pred block to match, do so now.
4038 if (InvertPredCond) {
4039 InvertBranch(PBI, Builder);
4040 }
4041
4042 BasicBlock *UniqueSucc =
4043 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4044
4045 // Before cloning instructions, notify the successor basic block that it
4046 // is about to have a new predecessor. This will update PHI nodes,
4047 // which will allow us to update live-out uses of bonus instructions.
4048 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4049
4050 // Try to update branch weights.
4051 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4052 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4053 SuccTrueWeight, SuccFalseWeight)) {
4054 SmallVector<uint64_t, 8> NewWeights;
4055
4056 if (PBI->getSuccessor(0) == BB) {
4057 // PBI: br i1 %x, BB, FalseDest
4058 // BI: br i1 %y, UniqueSucc, FalseDest
4059 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4060 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4061 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4062 // TrueWeight for PBI * FalseWeight for BI.
4063 // We assume that total weights of a BranchInst can fit into 32 bits.
4064 // Therefore, we will not have overflow using 64-bit arithmetic.
4065 NewWeights.push_back(PredFalseWeight *
4066 (SuccFalseWeight + SuccTrueWeight) +
4067 PredTrueWeight * SuccFalseWeight);
4068 } else {
4069 // PBI: br i1 %x, TrueDest, BB
4070 // BI: br i1 %y, TrueDest, UniqueSucc
4071 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4072 // FalseWeight for PBI * TrueWeight for BI.
4073 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4074 PredFalseWeight * SuccTrueWeight);
4075 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4076 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4077 }
4078
4079 // Halve the weights if any of them cannot fit in an uint32_t
4080 fitWeights(NewWeights);
4081
4082 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4083 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4084
4085 // TODO: If BB is reachable from all paths through PredBlock, then we
4086 // could replace PBI's branch probabilities with BI's.
4087 } else
4088 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4089
4090 // Now, update the CFG.
4091 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4092
4093 if (DTU)
4094 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4095 {DominatorTree::Delete, PredBlock, BB}});
4096
4097 // If BI was a loop latch, it may have had associated loop metadata.
4098 // We need to copy it to the new latch, that is, PBI.
4099 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4100 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4101
4102 ValueToValueMapTy VMap; // maps original values to cloned values
4104
4105 Module *M = BB->getModule();
4106
4107 if (PredBlock->IsNewDbgInfoFormat) {
4108 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4109 for (DbgVariableRecord &DVR :
4111 RemapDbgRecord(M, &DVR, VMap,
4113 }
4114 }
4115
4116 // Now that the Cond was cloned into the predecessor basic block,
4117 // or/and the two conditions together.
4118 Value *BICond = VMap[BI->getCondition()];
4119 PBI->setCondition(
4120 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4121
4122 ++NumFoldBranchToCommonDest;
4123 return true;
4124}
4125
4126/// Return if an instruction's type or any of its operands' types are a vector
4127/// type.
4128static bool isVectorOp(Instruction &I) {
4129 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4130 return U->getType()->isVectorTy();
4131 });
4132}
4133
4134/// If this basic block is simple enough, and if a predecessor branches to us
4135/// and one of our successors, fold the block into the predecessor and use
4136/// logical operations to pick the right destination.
4138 MemorySSAUpdater *MSSAU,
4139 const TargetTransformInfo *TTI,
4140 unsigned BonusInstThreshold) {
4141 // If this block ends with an unconditional branch,
4142 // let speculativelyExecuteBB() deal with it.
4143 if (!BI->isConditional())
4144 return false;
4145
4146 BasicBlock *BB = BI->getParent();
4150
4151 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4152
4153 if (!Cond ||
4154 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
4155 !isa<SelectInst>(Cond)) ||
4156 Cond->getParent() != BB || !Cond->hasOneUse())
4157 return false;
4158
4159 // Finally, don't infinitely unroll conditional loops.
4160 if (is_contained(successors(BB), BB))
4161 return false;
4162
4163 // With which predecessors will we want to deal with?
4165 for (BasicBlock *PredBlock : predecessors(BB)) {
4166 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4167
4168 // Check that we have two conditional branches. If there is a PHI node in
4169 // the common successor, verify that the same value flows in from both
4170 // blocks.
4171 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4172 continue;
4173
4174 // Determine if the two branches share a common destination.
4175 BasicBlock *CommonSucc;
4177 bool InvertPredCond;
4178 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4179 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4180 else
4181 continue;
4182
4183 // Check the cost of inserting the necessary logic before performing the
4184 // transformation.
4185 if (TTI) {
4186 Type *Ty = BI->getCondition()->getType();
4188 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4189 !isa<CmpInst>(PBI->getCondition())))
4190 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4191
4193 continue;
4194 }
4195
4196 // Ok, we do want to deal with this predecessor. Record it.
4197 Preds.emplace_back(PredBlock);
4198 }
4199
4200 // If there aren't any predecessors into which we can fold,
4201 // don't bother checking the cost.
4202 if (Preds.empty())
4203 return false;
4204
4205 // Only allow this transformation if computing the condition doesn't involve
4206 // too many instructions and these involved instructions can be executed
4207 // unconditionally. We denote all involved instructions except the condition
4208 // as "bonus instructions", and only allow this transformation when the
4209 // number of the bonus instructions we'll need to create when cloning into
4210 // each predecessor does not exceed a certain threshold.
4211 unsigned NumBonusInsts = 0;
4212 bool SawVectorOp = false;
4213 const unsigned PredCount = Preds.size();
4214 for (Instruction &I : *BB) {
4215 // Don't check the branch condition comparison itself.
4216 if (&I == Cond)
4217 continue;
4218 // Ignore dbg intrinsics, and the terminator.
4219 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
4220 continue;
4221 // I must be safe to execute unconditionally.
4223 return false;
4224 SawVectorOp |= isVectorOp(I);
4225
4226 // Account for the cost of duplicating this instruction into each
4227 // predecessor. Ignore free instructions.
4228 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4230 NumBonusInsts += PredCount;
4231
4232 // Early exits once we reach the limit.
4233 if (NumBonusInsts >
4234 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4235 return false;
4236 }
4237
4238 auto IsBCSSAUse = [BB, &I](Use &U) {
4239 auto *UI = cast<Instruction>(U.getUser());
4240 if (auto *PN = dyn_cast<PHINode>(UI))
4241 return PN->getIncomingBlock(U) == BB;
4242 return UI->getParent() == BB && I.comesBefore(UI);
4243 };
4244
4245 // Does this instruction require rewriting of uses?
4246 if (!all_of(I.uses(), IsBCSSAUse))
4247 return false;
4248 }
4249 if (NumBonusInsts >
4250 BonusInstThreshold *
4251 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4252 return false;
4253
4254 // Ok, we have the budget. Perform the transformation.
4255 for (BasicBlock *PredBlock : Preds) {
4256 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4257 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4258 }
4259 return false;
4260}
4261
4262// If there is only one store in BB1 and BB2, return it, otherwise return
4263// nullptr.
4265 StoreInst *S = nullptr;
4266 for (auto *BB : {BB1, BB2}) {
4267 if (!BB)
4268 continue;
4269 for (auto &I : *BB)
4270 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4271 if (S)
4272 // Multiple stores seen.
4273 return nullptr;
4274 else
4275 S = SI;
4276 }
4277 }
4278 return S;
4279}
4280
4282 Value *AlternativeV = nullptr) {
4283 // PHI is going to be a PHI node that allows the value V that is defined in
4284 // BB to be referenced in BB's only successor.
4285 //
4286 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4287 // doesn't matter to us what the other operand is (it'll never get used). We
4288 // could just create a new PHI with an undef incoming value, but that could
4289 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4290 // other PHI. So here we directly look for some PHI in BB's successor with V
4291 // as an incoming operand. If we find one, we use it, else we create a new
4292 // one.
4293 //
4294 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4295 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4296 // where OtherBB is the single other predecessor of BB's only successor.
4297 PHINode *PHI = nullptr;
4298 BasicBlock *Succ = BB->getSingleSuccessor();
4299
4300 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4301 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4302 PHI = cast<PHINode>(I);
4303 if (!AlternativeV)
4304 break;
4305
4306 assert(Succ->hasNPredecessors(2));
4307 auto PredI = pred_begin(Succ);
4308 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4309 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4310 break;
4311 PHI = nullptr;
4312 }
4313 if (PHI)
4314 return PHI;
4315
4316 // If V is not an instruction defined in BB, just return it.
4317 if (!AlternativeV &&
4318 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4319 return V;
4320
4321 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4322 PHI->insertBefore(Succ->begin());
4323 PHI->addIncoming(V, BB);
4324 for (BasicBlock *PredBB : predecessors(Succ))
4325 if (PredBB != BB)
4326 PHI->addIncoming(
4327 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4328 return PHI;
4329}
4330
4332 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4333 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4334 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4335 // For every pointer, there must be exactly two stores, one coming from
4336 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4337 // store (to any address) in PTB,PFB or QTB,QFB.
4338 // FIXME: We could relax this restriction with a bit more work and performance
4339 // testing.
4340 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4341 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4342 if (!PStore || !QStore)
4343 return false;
4344
4345 // Now check the stores are compatible.
4346 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4347 PStore->getValueOperand()->getType() !=
4348 QStore->getValueOperand()->getType())
4349 return false;
4350
4351 // Check that sinking the store won't cause program behavior changes. Sinking
4352 // the store out of the Q blocks won't change any behavior as we're sinking
4353 // from a block to its unconditional successor. But we're moving a store from
4354 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4355 // So we need to check that there are no aliasing loads or stores in
4356 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4357 // operations between PStore and the end of its parent block.
4358 //
4359 // The ideal way to do this is to query AliasAnalysis, but we don't
4360 // preserve AA currently so that is dangerous. Be super safe and just
4361 // check there are no other memory operations at all.
4362 for (auto &I : *QFB->getSinglePredecessor())
4363 if (I.mayReadOrWriteMemory())
4364 return false;
4365 for (auto &I : *QFB)
4366 if (&I != QStore && I.mayReadOrWriteMemory())
4367 return false;
4368 if (QTB)
4369 for (auto &I : *QTB)
4370 if (&I != QStore && I.mayReadOrWriteMemory())
4371 return false;
4372 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4373 I != E; ++I)
4374 if (&*I != PStore && I->mayReadOrWriteMemory())
4375 return false;
4376
4377 // If we're not in aggressive mode, we only optimize if we have some
4378 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4379 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4380 if (!BB)
4381 return true;
4382 // Heuristic: if the block can be if-converted/phi-folded and the
4383 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4384 // thread this store.
4386 InstructionCost Budget =
4388 for (auto &I : BB->instructionsWithoutDebug(false)) {
4389 // Consider terminator instruction to be free.
4390 if (I.isTerminator())
4391 continue;
4392 // If this is one the stores that we want to speculate out of this BB,
4393 // then don't count it's cost, consider it to be free.
4394 if (auto *S = dyn_cast<StoreInst>(&I))
4395 if (llvm::find(FreeStores, S))
4396 continue;
4397 // Else, we have a white-list of instructions that we are ak speculating.
4398 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4399 return false; // Not in white-list - not worthwhile folding.
4400 // And finally, if this is a non-free instruction that we are okay
4401 // speculating, ensure that we consider the speculation budget.
4402 Cost +=
4404 if (Cost > Budget)
4405 return false; // Eagerly refuse to fold as soon as we're out of budget.
4406 }
4407 assert(Cost <= Budget &&
4408 "When we run out of budget we will eagerly return from within the "
4409 "per-instruction loop.");
4410 return true;
4411 };
4412
4413 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4415 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4416 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4417 return false;
4418
4419 // If PostBB has more than two predecessors, we need to split it so we can
4420 // sink the store.
4421 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4422 // We know that QFB's only successor is PostBB. And QFB has a single
4423 // predecessor. If QTB exists, then its only successor is also PostBB.
4424 // If QTB does not exist, then QFB's only predecessor has a conditional
4425 // branch to QFB and PostBB.
4426 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4427 BasicBlock *NewBB =
4428 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4429 if (!NewBB)
4430 return false;
4431 PostBB = NewBB;
4432 }
4433
4434 // OK, we're going to sink the stores to PostBB. The store has to be
4435 // conditional though, so first create the predicate.
4436 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4437 ->getCondition();
4438 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4439 ->getCondition();
4440
4442 PStore->getParent());
4444 QStore->getParent(), PPHI);
4445
4446 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4447 IRBuilder<> QB(PostBB, PostBBFirst);
4448 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4449
4450 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4451 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4452
4453 if (InvertPCond)
4454 PPred = QB.CreateNot(PPred);
4455 if (InvertQCond)
4456 QPred = QB.CreateNot(QPred);
4457 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4458
4459 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4460 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4461 /*Unreachable=*/false,
4462 /*BranchWeights=*/nullptr, DTU);
4463
4464 QB.SetInsertPoint(T);
4465 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4466 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4467 // Choose the minimum alignment. If we could prove both stores execute, we
4468 // could use biggest one. In this case, though, we only know that one of the
4469 // stores executes. And we don't know it's safe to take the alignment from a
4470 // store that doesn't execute.
4471 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4472
4473 QStore->eraseFromParent();
4474 PStore->eraseFromParent();
4475
4476 return true;
4477}
4478
4480 DomTreeUpdater *DTU, const DataLayout &DL,
4481 const TargetTransformInfo &TTI) {
4482 // The intention here is to find diamonds or triangles (see below) where each
4483 // conditional block contains a store to the same address. Both of these
4484 // stores are conditional, so they can't be unconditionally sunk. But it may
4485 // be profitable to speculatively sink the stores into one merged store at the
4486 // end, and predicate the merged store on the union of the two conditions of
4487 // PBI and QBI.
4488 //
4489 // This can reduce the number of stores executed if both of the conditions are
4490 // true, and can allow the blocks to become small enough to be if-converted.
4491 // This optimization will also chain, so that ladders of test-and-set
4492 // sequences can be if-converted away.
4493 //
4494 // We only deal with simple diamonds or triangles:
4495 //
4496 // PBI or PBI or a combination of the two
4497 // / \ | \
4498 // PTB PFB | PFB
4499 // \ / | /
4500 // QBI QBI
4501 // / \ | \
4502 // QTB QFB | QFB
4503 // \ / | /
4504 // PostBB PostBB
4505 //
4506 // We model triangles as a type of diamond with a nullptr "true" block.
4507 // Triangles are canonicalized so that the fallthrough edge is represented by
4508 // a true condition, as in the diagram above.
4509 BasicBlock *PTB = PBI->getSuccessor(0);
4510 BasicBlock *PFB = PBI->getSuccessor(1);
4511 BasicBlock *QTB = QBI->getSuccessor(0);
4512 BasicBlock *QFB = QBI->getSuccessor(1);
4513 BasicBlock *PostBB = QFB->getSingleSuccessor();
4514
4515 // Make sure we have a good guess for PostBB. If QTB's only successor is
4516 // QFB, then QFB is a better PostBB.
4517 if (QTB->getSingleSuccessor() == QFB)
4518 PostBB = QFB;
4519
4520 // If we couldn't find a good PostBB, stop.
4521 if (!PostBB)
4522 return false;
4523
4524 bool InvertPCond = false, InvertQCond = false;
4525 // Canonicalize fallthroughs to the true branches.
4526 if (PFB == QBI->getParent()) {
4527 std::swap(PFB, PTB);
4528 InvertPCond = true;
4529 }
4530 if (QFB == PostBB) {
4531 std::swap(QFB, QTB);
4532 InvertQCond = true;
4533 }
4534
4535 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4536 // and QFB may not. Model fallthroughs as a nullptr block.
4537 if (PTB == QBI->getParent())
4538 PTB = nullptr;
4539 if (QTB == PostBB)
4540 QTB = nullptr;
4541
4542 // Legality bailouts. We must have at least the non-fallthrough blocks and
4543 // the post-dominating block, and the non-fallthroughs must only have one
4544 // predecessor.
4545 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4546 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4547 };
4548 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4549 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4550 return false;
4551 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4552 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4553 return false;
4554 if (!QBI->getParent()->hasNUses(2))
4555 return false;
4556
4557 // OK, this is a sequence of two diamonds or triangles.
4558 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4559 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4560 for (auto *BB : {PTB, PFB}) {
4561 if (!BB)
4562 continue;
4563 for (auto &I : *BB)
4564 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4565 PStoreAddresses.insert(SI->getPointerOperand());
4566 }
4567 for (auto *BB : {QTB, QFB}) {
4568 if (!BB)
4569 continue;
4570 for (auto &I : *BB)
4571 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4572 QStoreAddresses.insert(SI->getPointerOperand());
4573 }
4574
4575 set_intersect(PStoreAddresses, QStoreAddresses);
4576 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4577 // clear what it contains.
4578 auto &CommonAddresses = PStoreAddresses;
4579
4580 bool Changed = false;
4581 for (auto *Address : CommonAddresses)
4582 Changed |=
4583 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4584 InvertPCond, InvertQCond, DTU, DL, TTI);
4585 return Changed;
4586}
4587
4588/// If the previous block ended with a widenable branch, determine if reusing
4589/// the target block is profitable and legal. This will have the effect of
4590/// "widening" PBI, but doesn't require us to reason about hosting safety.
4592 DomTreeUpdater *DTU) {
4593 // TODO: This can be generalized in two important ways:
4594 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4595 // values from the PBI edge.
4596 // 2) We can sink side effecting instructions into BI's fallthrough
4597 // successor provided they doesn't contribute to computation of
4598 // BI's condition.
4599 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4600 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4601 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4602 !BI->getParent()->getSinglePredecessor())
4603 return false;
4604 if (!IfFalseBB->phis().empty())
4605 return false; // TODO
4606 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4607 // may undo the transform done here.
4608 // TODO: There might be a more fine-grained solution to this.
4609 if (!llvm::succ_empty(IfFalseBB))
4610 return false;
4611 // Use lambda to lazily compute expensive condition after cheap ones.
4612 auto NoSideEffects = [](BasicBlock &BB) {
4613 return llvm::none_of(BB, [](const Instruction &I) {
4614 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4615 });
4616 };
4617 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4618 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4619 NoSideEffects(*BI->getParent())) {
4620 auto *OldSuccessor = BI->getSuccessor(1);
4621 OldSuccessor->removePredecessor(BI->getParent());
4622 BI->setSuccessor(1, IfFalseBB);
4623 if (DTU)
4624 DTU->applyUpdates(
4625 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4626 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4627 return true;
4628 }
4629 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4630 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4631 NoSideEffects(*BI->getParent())) {
4632 auto *OldSuccessor = BI->getSuccessor(0);
4633 OldSuccessor->removePredecessor(BI->getParent());
4634 BI->setSuccessor(0, IfFalseBB);
4635 if (DTU)
4636 DTU->applyUpdates(
4637 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4638 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4639 return true;
4640 }
4641 return false;
4642}
4643
4644/// If we have a conditional branch as a predecessor of another block,
4645/// this function tries to simplify it. We know
4646/// that PBI and BI are both conditional branches, and BI is in one of the
4647/// successor blocks of PBI - PBI branches to BI.
4649 DomTreeUpdater *DTU,
4650 const DataLayout &DL,
4651 const TargetTransformInfo &TTI) {
4652 assert(PBI->isConditional() && BI->isConditional());
4653 BasicBlock *BB = BI->getParent();
4654
4655 // If this block ends with a branch instruction, and if there is a
4656 // predecessor that ends on a branch of the same condition, make
4657 // this conditional branch redundant.
4658 if (PBI->getCondition() == BI->getCondition() &&
4659 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4660 // Okay, the outcome of this conditional branch is statically
4661 // knowable. If this block had a single pred, handle specially, otherwise
4662 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4663 if (BB->getSinglePredecessor()) {
4664 // Turn this into a branch on constant.
4665 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4666 BI->setCondition(
4667 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4668 return true; // Nuke the branch on constant.
4669 }
4670 }
4671
4672 // If the previous block ended with a widenable branch, determine if reusing
4673 // the target block is profitable and legal. This will have the effect of
4674 // "widening" PBI, but doesn't require us to reason about hosting safety.
4675 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4676 return true;
4677
4678 // If both branches are conditional and both contain stores to the same
4679 // address, remove the stores from the conditionals and create a conditional
4680 // merged store at the end.
4681 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4682 return true;
4683
4684 // If this is a conditional branch in an empty block, and if any
4685 // predecessors are a conditional branch to one of our destinations,
4686 // fold the conditions into logical ops and one cond br.
4687
4688 // Ignore dbg intrinsics.
4689 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4690 return false;
4691
4692 int PBIOp, BIOp;
4693 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4694 PBIOp = 0;
4695 BIOp = 0;
4696 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4697 PBIOp = 0;
4698 BIOp = 1;
4699 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4700 PBIOp = 1;
4701 BIOp = 0;
4702 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4703 PBIOp = 1;
4704 BIOp = 1;
4705 } else {
4706 return false;
4707 }
4708
4709 // Check to make sure that the other destination of this branch
4710 // isn't BB itself. If so, this is an infinite loop that will
4711 // keep getting unwound.
4712 if (PBI->getSuccessor(PBIOp) == BB)
4713 return false;
4714
4715 // If predecessor's branch probability to BB is too low don't merge branches.
4716 SmallVector<uint32_t, 2> PredWeights;
4717 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4718 extractBranchWeights(*PBI, PredWeights) &&
4719 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4720
4722 PredWeights[PBIOp],
4723 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4724
4726 if (CommonDestProb >= Likely)
4727 return false;
4728 }
4729
4730 // Do not perform this transformation if it would require
4731 // insertion of a large number of select instructions. For targets
4732 // without predication/cmovs, this is a big pessimization.
4733
4734 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4735 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4736 unsigned NumPhis = 0;
4737 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4738 ++II, ++NumPhis) {
4739 if (NumPhis > 2) // Disable this xform.
4740 return false;
4741 }
4742
4743 // Finally, if everything is ok, fold the branches to logical ops.
4744 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4745
4746 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4747 << "AND: " << *BI->getParent());
4748
4750
4751 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4752 // branch in it, where one edge (OtherDest) goes back to itself but the other
4753 // exits. We don't *know* that the program avoids the infinite loop
4754 // (even though that seems likely). If we do this xform naively, we'll end up
4755 // recursively unpeeling the loop. Since we know that (after the xform is
4756 // done) that the block *is* infinite if reached, we just make it an obviously
4757 // infinite loop with no cond branch.
4758 if (OtherDest == BB) {
4759 // Insert it at the end of the function, because it's either code,
4760 // or it won't matter if it's hot. :)
4761 BasicBlock *InfLoopBlock =
4762 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4763 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4764 if (DTU)
4765 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4766 OtherDest = InfLoopBlock;
4767 }
4768
4769 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4770
4771 // BI may have other predecessors. Because of this, we leave
4772 // it alone, but modify PBI.
4773
4774 // Make sure we get to CommonDest on True&True directions.
4775 Value *PBICond = PBI->getCondition();
4776 IRBuilder<NoFolder> Builder(PBI);
4777 if (PBIOp)
4778 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4779
4780 Value *BICond = BI->getCondition();
4781 if (BIOp)
4782 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4783
4784 // Merge the conditions.
4785 Value *Cond =
4786 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4787
4788 // Modify PBI to branch on the new condition to the new dests.
4789 PBI->setCondition(Cond);
4790 PBI->setSuccessor(0, CommonDest);
4791 PBI->setSuccessor(1, OtherDest);
4792
4793 if (DTU) {
4794 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4795 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4796
4797 DTU->applyUpdates(Updates);
4798 }
4799
4800 // Update branch weight for PBI.
4801 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4802 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4803 bool HasWeights =
4804 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4805 SuccTrueWeight, SuccFalseWeight);
4806 if (HasWeights) {
4807 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4808 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4809 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4810 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4811 // The weight to CommonDest should be PredCommon * SuccTotal +
4812 // PredOther * SuccCommon.
4813 // The weight to OtherDest should be PredOther * SuccOther.
4814 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4815 PredOther * SuccCommon,
4816 PredOther * SuccOther};
4817 // Halve the weights if any of them cannot fit in an uint32_t
4818 fitWeights(NewWeights);
4819
4820 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4821 }
4822
4823 // OtherDest may have phi nodes. If so, add an entry from PBI's
4824 // block that are identical to the entries for BI's block.
4825 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4826
4827 // We know that the CommonDest already had an edge from PBI to
4828 // it. If it has PHIs though, the PHIs may have different
4829 // entries for BB and PBI's BB. If so, insert a select to make
4830 // them agree.
4831 for (PHINode &PN : CommonDest->phis()) {
4832 Value *BIV = PN.getIncomingValueForBlock(BB);
4833 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4834 Value *PBIV = PN.getIncomingValue(PBBIdx);
4835 if (BIV != PBIV) {
4836 // Insert a select in PBI to pick the right value.
4837 SelectInst *NV = cast<SelectInst>(
4838 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4839 PN.setIncomingValue(PBBIdx, NV);
4840 // Although the select has the same condition as PBI, the original branch
4841 // weights for PBI do not apply to the new select because the select's
4842 // 'logical' edges are incoming edges of the phi that is eliminated, not
4843 // the outgoing edges of PBI.
4844 if (HasWeights) {
4845 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4846 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4847 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4848 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4849 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4850 // The weight to PredOtherDest should be PredOther * SuccCommon.
4851 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4852 PredOther * SuccCommon};
4853
4854 fitWeights(NewWeights);
4855
4856 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4857 /*IsExpected=*/false);
4858 }
4859 }
4860 }
4861
4862 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4863 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4864
4865 // This basic block is probably dead. We know it has at least
4866 // one fewer predecessor.
4867 return true;
4868}
4869
4870// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4871// true or to FalseBB if Cond is false.
4872// Takes care of updating the successors and removing the old terminator.
4873// Also makes sure not to introduce new successors by assuming that edges to
4874// non-successor TrueBBs and FalseBBs aren't reachable.
4875bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4876 Value *Cond, BasicBlock *TrueBB,
4877 BasicBlock *FalseBB,
4878 uint32_t TrueWeight,
4879 uint32_t FalseWeight) {
4880 auto *BB = OldTerm->getParent();
4881 // Remove any superfluous successor edges from the CFG.
4882 // First, figure out which successors to preserve.
4883 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4884 // successor.
4885 BasicBlock *KeepEdge1 = TrueBB;
4886 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4887
4888 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4889
4890 // Then remove the rest.
4891 for (BasicBlock *Succ : successors(OldTerm)) {
4892 // Make sure only to keep exactly one copy of each edge.
4893 if (Succ == KeepEdge1)
4894 KeepEdge1 = nullptr;
4895 else if (Succ == KeepEdge2)
4896 KeepEdge2 = nullptr;
4897 else {
4898 Succ->removePredecessor(BB,
4899 /*KeepOneInputPHIs=*/true);
4900
4901 if (Succ != TrueBB && Succ != FalseBB)
4902 RemovedSuccessors.insert(Succ);
4903 }
4904 }
4905
4906 IRBuilder<> Builder(OldTerm);
4907 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4908
4909 // Insert an appropriate new terminator.
4910 if (!KeepEdge1 && !KeepEdge2) {
4911 if (TrueBB == FalseBB) {
4912 // We were only looking for one successor, and it was present.
4913 // Create an unconditional branch to it.
4914 Builder.CreateBr(TrueBB);
4915 } else {
4916 // We found both of the successors we were looking for.
4917 // Create a conditional branch sharing the condition of the select.
4918 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4919 if (TrueWeight != FalseWeight)
4920 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4921 }
4922 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4923 // Neither of the selected blocks were successors, so this
4924 // terminator must be unreachable.
4925 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4926 } else {
4927 // One of the selected values was a successor, but the other wasn't.
4928 // Insert an unconditional branch to the one that was found;
4929 // the edge to the one that wasn't must be unreachable.
4930 if (!KeepEdge1) {
4931 // Only TrueBB was found.
4932 Builder.CreateBr(TrueBB);
4933 } else {
4934 // Only FalseBB was found.
4935 Builder.CreateBr(FalseBB);
4936 }
4937 }
4938
4940
4941 if (DTU) {
4943 Updates.reserve(RemovedSuccessors.size());
4944 for (auto *RemovedSuccessor : RemovedSuccessors)
4945 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4946 DTU->applyUpdates(Updates);
4947 }
4948
4949 return true;
4950}
4951
4952// Replaces
4953// (switch (select cond, X, Y)) on constant X, Y
4954// with a branch - conditional if X and Y lead to distinct BBs,
4955// unconditional otherwise.
4956bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4957 SelectInst *Select) {
4958 // Check for constant integer values in the select.
4959 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4960 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4961 if (!TrueVal || !FalseVal)
4962 return false;
4963
4964 // Find the relevant condition and destinations.
4965 Value *Condition = Select->getCondition();
4966 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4967 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4968
4969 // Get weight for TrueBB and FalseBB.
4970 uint32_t TrueWeight = 0, FalseWeight = 0;
4972 bool HasWeights = hasBranchWeightMD(*SI);
4973 if (HasWeights) {
4974 getBranchWeights(SI, Weights);
4975 if (Weights.size() == 1 + SI->getNumCases()) {
4976 TrueWeight =
4977 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4978 FalseWeight =
4979 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4980 }
4981 }
4982
4983 // Perform the actual simplification.
4984 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4985 FalseWeight);
4986}
4987
4988// Replaces
4989// (indirectbr (select cond, blockaddress(@fn, BlockA),
4990// blockaddress(@fn, BlockB)))
4991// with
4992// (br cond, BlockA, BlockB).
4993bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4994 SelectInst *SI) {
4995 // Check that both operands of the select are block addresses.
4996 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4997 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4998 if (!TBA || !FBA)
4999 return false;
5000
5001 // Extract the actual blocks.
5002 BasicBlock *TrueBB = TBA->getBasicBlock();
5003 BasicBlock *FalseBB = FBA->getBasicBlock();
5004
5005 // Perform the actual simplification.
5006 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5007 0);
5008}
5009
5010/// This is called when we find an icmp instruction
5011/// (a seteq/setne with a constant) as the only instruction in a
5012/// block that ends with an uncond branch. We are looking for a very specific
5013/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5014/// this case, we merge the first two "or's of icmp" into a switch, but then the
5015/// default value goes to an uncond block with a seteq in it, we get something
5016/// like:
5017///
5018/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5019/// DEFAULT:
5020/// %tmp = icmp eq i8 %A, 92
5021/// br label %end
5022/// end:
5023/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5024///
5025/// We prefer to split the edge to 'end' so that there is a true/false entry to
5026/// the PHI, merging the third icmp into the switch.
5027bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5028 ICmpInst *ICI, IRBuilder<> &Builder) {
5029 BasicBlock *BB = ICI->getParent();
5030
5031 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5032 // complex.
5033 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5034 return false;
5035
5036 Value *V = ICI->getOperand(0);
5037 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5038
5039 // The pattern we're looking for is where our only predecessor is a switch on
5040 // 'V' and this block is the default case for the switch. In this case we can
5041 // fold the compared value into the switch to simplify things.
5042 BasicBlock *Pred = BB->getSinglePredecessor();
5043 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5044 return false;
5045
5046 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5047 if (SI->getCondition() != V)
5048 return false;
5049
5050 // If BB is reachable on a non-default case, then we simply know the value of
5051 // V in this block. Substitute it and constant fold the icmp instruction
5052 // away.
5053 if (SI->getDefaultDest() != BB) {
5054 ConstantInt *VVal = SI->findCaseDest(BB);
5055 assert(VVal && "Should have a unique destination value");
5056 ICI->setOperand(0, VVal);
5057
5058 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5059 ICI->replaceAllUsesWith(V);
5060 ICI->eraseFromParent();
5061 }
5062 // BB is now empty, so it is likely to simplify away.
5063 return requestResimplify();
5064 }
5065
5066 // Ok, the block is reachable from the default dest. If the constant we're
5067 // comparing exists in one of the other edges, then we can constant fold ICI
5068 // and zap it.
5069 if (SI->findCaseValue(Cst) != SI->case_default()) {
5070 Value *V;
5071 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5073 else
5075
5076 ICI->replaceAllUsesWith(V);
5077 ICI->eraseFromParent();
5078 // BB is now empty, so it is likely to simplify away.
5079 return requestResimplify();
5080 }
5081
5082 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5083 // the block.
5084 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5085 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5086 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5087 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5088 return false;
5089
5090 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5091 // true in the PHI.
5092 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5093 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5094
5095 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5096 std::swap(DefaultCst, NewCst);
5097
5098 // Replace ICI (which is used by the PHI for the default value) with true or
5099 // false depending on if it is EQ or NE.
5100 ICI->replaceAllUsesWith(DefaultCst);
5101 ICI->eraseFromParent();
5102
5104
5105 // Okay, the switch goes to this block on a default value. Add an edge from
5106 // the switch to the merge point on the compared value.
5107 BasicBlock *NewBB =
5108 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5109 {
5111 auto W0 = SIW.getSuccessorWeight(0);
5113 if (W0) {
5114 NewW = ((uint64_t(*W0) + 1) >> 1);
5115 SIW.setSuccessorWeight(0, *NewW);
5116 }
5117 SIW.addCase(Cst, NewBB, NewW);
5118 if (DTU)
5119 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5120 }
5121
5122 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5123 Builder.SetInsertPoint(NewBB);
5124 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5125 Builder.CreateBr(SuccBlock);
5126 PHIUse->addIncoming(NewCst, NewBB);
5127 if (DTU) {
5128 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5129 DTU->applyUpdates(Updates);
5130 }
5131 return true;
5132}
5133
5134/// The specified branch is a conditional branch.
5135/// Check to see if it is branching on an or/and chain of icmp instructions, and
5136/// fold it into a switch instruction if so.
5137bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5138 IRBuilder<> &Builder,
5139 const DataLayout &DL) {
5140 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5141 if (!Cond)
5142 return false;
5143
5144 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5145 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5146 // 'setne's and'ed together, collect them.
5147
5148 // Try to gather values from a chain of and/or to be turned into a switch
5149 ConstantComparesGatherer ConstantCompare(Cond, DL);
5150 // Unpack the result
5151 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5152 Value *CompVal = ConstantCompare.CompValue;
5153 unsigned UsedICmps = ConstantCompare.UsedICmps;
5154 Value *ExtraCase = ConstantCompare.Extra;
5155
5156 // If we didn't have a multiply compared value, fail.
5157 if (!CompVal)
5158 return false;
5159
5160 // Avoid turning single icmps into a switch.
5161 if (UsedICmps <= 1)
5162 return false;
5163
5164 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
5165
5166 // There might be duplicate constants in the list, which the switch
5167 // instruction can't handle, remove them now.
5168 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5169 Values.erase(llvm::unique(Values), Values.end());
5170
5171 // If Extra was used, we require at least two switch values to do the
5172 // transformation. A switch with one value is just a conditional branch.
5173 if (ExtraCase && Values.size() < 2)
5174 return false;
5175
5176 // TODO: Preserve branch weight metadata, similarly to how
5177 // foldValueComparisonIntoPredecessors preserves it.
5178
5179 // Figure out which block is which destination.
5180 BasicBlock *DefaultBB = BI->getSuccessor(1);
5181 BasicBlock *EdgeBB = BI->getSuccessor(0);
5182 if (!TrueWhenEqual)
5183 std::swap(DefaultBB, EdgeBB);
5184
5185 BasicBlock *BB = BI->getParent();
5186
5187 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5188 << " cases into SWITCH. BB is:\n"
5189 << *BB);
5190
5192
5193 // If there are any extra values that couldn't be folded into the switch
5194 // then we evaluate them with an explicit branch first. Split the block
5195 // right before the condbr to handle it.
5196 if (ExtraCase) {
5197 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5198 /*MSSAU=*/nullptr, "switch.early.test");
5199
5200 // Remove the uncond branch added to the old block.
5201 Instruction *OldTI = BB->getTerminator();
5202 Builder.SetInsertPoint(OldTI);
5203
5204 // There can be an unintended UB if extra values are Poison. Before the
5205 // transformation, extra values may not be evaluated according to the
5206 // condition, and it will not raise UB. But after transformation, we are
5207 // evaluating extra values before checking the condition, and it will raise
5208 // UB. It can be solved by adding freeze instruction to extra values.
5209 AssumptionCache *AC = Options.AC;
5210
5211 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5212 ExtraCase = Builder.CreateFreeze(ExtraCase);
5213
5214 if (TrueWhenEqual)
5215 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5216 else
5217 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5218
5219 OldTI->eraseFromParent();
5220
5221 if (DTU)
5222 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5223
5224 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5225 // for the edge we just added.
5226 addPredecessorToBlock(EdgeBB, BB, NewBB);
5227
5228 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5229 << "\nEXTRABB = " << *BB);
5230 BB = NewBB;
5231 }
5232
5233 Builder.SetInsertPoint(BI);
5234 // Convert pointer to int before we switch.
5235 if (CompVal->getType()->isPointerTy()) {
5236 CompVal = Builder.CreatePtrToInt(
5237 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5238 }
5239
5240 // Create the new switch instruction now.
5241 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5242
5243 // Add all of the 'cases' to the switch instruction.
5244 for (unsigned i = 0, e = Values.size(); i != e; ++i)
5245 New->addCase(Values[i], EdgeBB);
5246
5247 // We added edges from PI to the EdgeBB. As such, if there were any
5248 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5249 // the number of edges added.
5250 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5251 PHINode *PN = cast<PHINode>(BBI);
5252 Value *InVal = PN->getIncomingValueForBlock(BB);
5253 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5254 PN->addIncoming(InVal, BB);
5255 }
5256
5257 // Erase the old branch instruction.
5259 if (DTU)
5260 DTU->applyUpdates(Updates);
5261
5262 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5263 return true;
5264}
5265
5266bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5267 if (isa<PHINode>(RI->getValue()))
5268 return simplifyCommonResume(RI);
5269 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5270 RI->getValue() == RI->getParent()->getFirstNonPHI())
5271 // The resume must unwind the exception that caused control to branch here.
5272 return simplifySingleResume(RI);
5273
5274 return false;
5275}
5276
5277// Check if cleanup block is empty
5279 for (Instruction &I : R) {
5280 auto *II = dyn_cast<IntrinsicInst>(&I);
5281 if (!II)
5282 return false;
5283
5284 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5285 switch (IntrinsicID) {
5286 case Intrinsic::dbg_declare:
5287 case Intrinsic::dbg_value:
5288 case Intrinsic::dbg_label:
5289 case Intrinsic::lifetime_end:
5290 break;
5291 default:
5292 return false;
5293 }
5294 }
5295 return true;
5296}
5297
5298// Simplify resume that is shared by several landing pads (phi of landing pad).
5299bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5300 BasicBlock *BB = RI->getParent();
5301
5302 // Check that there are no other instructions except for debug and lifetime
5303 // intrinsics between the phi's and resume instruction.
5305 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
5306 return false;
5307
5308 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5309 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5310
5311 // Check incoming blocks to see if any of them are trivial.
5312 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5313 Idx++) {
5314 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5315 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5316
5317 // If the block has other successors, we can not delete it because
5318 // it has other dependents.
5319 if (IncomingBB->getUniqueSuccessor() != BB)
5320 continue;
5321
5322 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5323 // Not the landing pad that caused the control to branch here.
5324 if (IncomingValue != LandingPad)
5325 continue;
5326
5328 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5329 TrivialUnwindBlocks.insert(IncomingBB);
5330 }
5331
5332 // If no trivial unwind blocks, don't do any simplifications.
5333 if (TrivialUnwindBlocks.empty())
5334 return false;
5335
5336 // Turn all invokes that unwind here into calls.
5337 for (auto *TrivialBB : TrivialUnwindBlocks) {
5338 // Blocks that will be simplified should be removed from the phi node.
5339 // Note there could be multiple edges to the resume block, and we need
5340 // to remove them all.
5341 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5342 BB->removePredecessor(TrivialBB, true);
5343
5344 for (BasicBlock *Pred :
5346 removeUnwindEdge(Pred, DTU);
5347 ++NumInvokes;
5348 }
5349
5350 // In each SimplifyCFG run, only the current processed block can be erased.
5351 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5352 // of erasing TrivialBB, we only remove the branch to the common resume
5353 // block so that we can later erase the resume block since it has no
5354 // predecessors.
5355 TrivialBB->getTerminator()->eraseFromParent();
5356 new UnreachableInst(RI->getContext(), TrivialBB);
5357 if (DTU)
5358 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5359 }
5360
5361 // Delete the resume block if all its predecessors have been removed.
5362 if (pred_empty(BB))
5363 DeleteDeadBlock(BB, DTU);
5364
5365 return !TrivialUnwindBlocks.empty();
5366}
5367
5368// Simplify resume that is only used by a single (non-phi) landing pad.
5369bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5370 BasicBlock *BB = RI->getParent();
5371 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5372 assert(RI->getValue() == LPInst &&
5373 "Resume must unwind the exception that caused control to here");
5374
5375 // Check that there are no other instructions except for debug intrinsics.
5377 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5378 return false;
5379
5380 // Turn all invokes that unwind here into calls and delete the basic block.
5382 removeUnwindEdge(Pred, DTU);
5383 ++NumInvokes;
5384 }
5385
5386 // The landingpad is now unreachable. Zap it.
5387 DeleteDeadBlock(BB, DTU);
5388 return true;
5389}
5390
5392 // If this is a trivial cleanup pad that executes no instructions, it can be
5393 // eliminated. If the cleanup pad continues to the caller, any predecessor
5394 // that is an EH pad will be updated to continue to the caller and any
5395 // predecessor that terminates with an invoke instruction will have its invoke
5396 // instruction converted to a call instruction. If the cleanup pad being
5397 // simplified does not continue to the caller, each predecessor will be
5398 // updated to continue to the unwind destination of the cleanup pad being
5399 // simplified.
5400 BasicBlock *BB = RI->getParent();
5401 CleanupPadInst *CPInst = RI->getCleanupPad();
5402 if (CPInst->getParent() != BB)
5403 // This isn't an empty cleanup.
5404 return false;
5405
5406 // We cannot kill the pad if it has multiple uses. This typically arises
5407 // from unreachable basic blocks.
5408 if (!CPInst->hasOneUse())
5409 return false;
5410
5411 // Check that there are no other instructions except for benign intrinsics.
5413 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5414 return false;
5415
5416 // If the cleanup return we are simplifying unwinds to the caller, this will
5417 // set UnwindDest to nullptr.
5418 BasicBlock *UnwindDest = RI->getUnwindDest();
5419 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5420
5421 // We're about to remove BB from the control flow. Before we do, sink any
5422 // PHINodes into the unwind destination. Doing this before changing the
5423 // control flow avoids some potentially slow checks, since we can currently
5424 // be certain that UnwindDest and BB have no common predecessors (since they
5425 // are both EH pads).
5426 if (UnwindDest) {
5427 // First, go through the PHI nodes in UnwindDest and update any nodes that
5428 // reference the block we are removing
5429 for (PHINode &DestPN : UnwindDest->phis()) {
5430 int Idx = DestPN.getBasicBlockIndex(BB);
5431 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5432 assert(Idx != -1);
5433 // This PHI node has an incoming value that corresponds to a control
5434 // path through the cleanup pad we are removing. If the incoming
5435 // value is in the cleanup pad, it must be a PHINode (because we
5436 // verified above that the block is otherwise empty). Otherwise, the
5437 // value is either a constant or a value that dominates the cleanup
5438 // pad being removed.
5439 //
5440 // Because BB and UnwindDest are both EH pads, all of their
5441 // predecessors must unwind to these blocks, and since no instruction
5442 // can have multiple unwind destinations, there will be no overlap in
5443 // incoming blocks between SrcPN and DestPN.
5444 Value *SrcVal = DestPN.getIncomingValue(Idx);
5445 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5446
5447 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5448 for (auto *Pred : predecessors(BB)) {
5449 Value *Incoming =
5450 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5451 DestPN.addIncoming(Incoming, Pred);
5452 }
5453 }
5454
5455 // Sink any remaining PHI nodes directly into UnwindDest.
5456 Instruction *InsertPt = DestEHPad;
5457 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5458 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5459 // If the PHI node has no uses or all of its uses are in this basic
5460 // block (meaning they are debug or lifetime intrinsics), just leave
5461 // it. It will be erased when we erase BB below.
5462 continue;
5463
5464 // Otherwise, sink this PHI node into UnwindDest.
5465 // Any predecessors to UnwindDest which are not already represented
5466 // must be back edges which inherit the value from the path through
5467 // BB. In this case, the PHI value must reference itself.
5468 for (auto *pred : predecessors(UnwindDest))
5469 if (pred != BB)
5470 PN.addIncoming(&PN, pred);
5471 PN.moveBefore(InsertPt);
5472 // Also, add a dummy incoming value for the original BB itself,
5473 // so that the PHI is well-formed until we drop said predecessor.
5474 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5475 }
5476 }
5477
5478 std::vector<DominatorTree::UpdateType> Updates;
5479
5480 // We use make_early_inc_range here because we will remove all predecessors.
5482 if (UnwindDest == nullptr) {
5483 if (DTU) {
5484 DTU->applyUpdates(Updates);
5485 Updates.clear();
5486 }
5487 removeUnwindEdge(PredBB, DTU);
5488 ++NumInvokes;
5489 } else {
5490 BB->removePredecessor(PredBB);
5491 Instruction *TI = PredBB->getTerminator();
5492 TI->replaceUsesOfWith(BB, UnwindDest);
5493 if (DTU) {
5494 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5495 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5496 }
5497 }
5498 }
5499
5500 if (DTU)
5501 DTU->applyUpdates(Updates);
5502
5503 DeleteDeadBlock(BB, DTU);
5504
5505 return true;
5506}
5507
5508// Try to merge two cleanuppads together.
5510 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5511 // with.
5512 BasicBlock *UnwindDest = RI->getUnwindDest();
5513 if (!UnwindDest)
5514 return false;
5515
5516 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5517 // be safe to merge without code duplication.
5518 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5519 return false;
5520
5521 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5522 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5523 if (!SuccessorCleanupPad)
5524 return false;
5525
5526 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5527 // Replace any uses of the successor cleanupad with the predecessor pad
5528 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5529 // funclet bundle operands.
5530 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5531 // Remove the old cleanuppad.
5532 SuccessorCleanupPad->eraseFromParent();
5533 // Now, we simply replace the cleanupret with a branch to the unwind
5534 // destination.
5535 BranchInst::Create(UnwindDest, RI->getParent());
5536 RI->eraseFromParent();
5537
5538 return true;
5539}
5540
5541bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5542 // It is possible to transiantly have an undef cleanuppad operand because we
5543 // have deleted some, but not all, dead blocks.
5544 // Eventually, this block will be deleted.
5545 if (isa<UndefValue>(RI->getOperand(0)))
5546 return false;
5547
5548 if (mergeCleanupPad(RI))
5549 return true;
5550
5551 if (removeEmptyCleanup(RI, DTU))
5552 return true;
5553
5554 return false;
5555}
5556
5557// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5558bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5559 BasicBlock *BB = UI->getParent();
5560
5561 bool Changed = false;
5562
5563 // Ensure that any debug-info records that used to occur after the Unreachable
5564 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5565 // the block.
5567
5568 // Debug-info records on the unreachable inst itself should be deleted, as
5569 // below we delete everything past the final executable instruction.
5570 UI->dropDbgRecords();
5571
5572 // If there are any instructions immediately before the unreachable that can
5573 // be removed, do so.
5574 while (UI->getIterator() != BB->begin()) {
5576 --BBI;
5577
5579 break; // Can not drop any more instructions. We're done here.
5580 // Otherwise, this instruction can be freely erased,
5581 // even if it is not side-effect free.
5582
5583 // Note that deleting EH's here is in fact okay, although it involves a bit
5584 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5585 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5586 // and we can therefore guarantee this block will be erased.
5587
5588 // If we're deleting this, we're deleting any subsequent debug info, so
5589 // delete DbgRecords.
5590 BBI->dropDbgRecords();
5591
5592 // Delete this instruction (any uses are guaranteed to be dead)
5593 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5594 BBI->eraseFromParent();
5595 Changed = true;
5596 }
5597
5598 // If the unreachable instruction is the first in the block, take a gander
5599 // at all of the predecessors of this instruction, and simplify them.
5600 if (&BB->front() != UI)
5601 return Changed;
5602
5603 std::vector<DominatorTree::UpdateType> Updates;
5604
5606 for (BasicBlock *Predecessor : Preds) {
5607 Instruction *TI = Predecessor->getTerminator();
5608 IRBuilder<> Builder(TI);
5609 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5610 // We could either have a proper unconditional branch,
5611 // or a degenerate conditional branch with matching destinations.
5612 if (all_of(BI->successors(),
5613 [BB](auto *Successor) { return Successor == BB; })) {
5614 new UnreachableInst(TI->getContext(), TI->getIterator());
5615 TI->eraseFromParent();
5616 Changed = true;
5617 } else {
5618 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5619 Value* Cond = BI->getCondition();
5620 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5621 "The destinations are guaranteed to be different here.");
5622 CallInst *Assumption;
5623 if (BI->getSuccessor(0) == BB) {
5624 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5625 Builder.CreateBr(BI->getSuccessor(1));
5626 } else {
5627 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5628 Assumption = Builder.CreateAssumption(Cond);
5629 Builder.CreateBr(BI->getSuccessor(0));
5630 }
5631 if (Options.AC)
5632 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5633
5635 Changed = true;
5636 }
5637 if (DTU)
5638 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5639 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5641 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5642 if (i->getCaseSuccessor() != BB) {
5643 ++i;
5644 continue;
5645 }
5646 BB->removePredecessor(SU->getParent());
5647 i = SU.removeCase(i);
5648 e = SU->case_end();
5649 Changed = true;
5650 }
5651 // Note that the default destination can't be removed!
5652 if (DTU && SI->getDefaultDest() != BB)
5653 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5654 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5655 if (II->getUnwindDest() == BB) {
5656 if (DTU) {
5657 DTU->applyUpdates(Updates);
5658 Updates.clear();
5659 }
5660 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5661 if (!CI->doesNotThrow())
5662 CI->setDoesNotThrow();
5663 Changed = true;
5664 }
5665 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5666 if (CSI->getUnwindDest() == BB) {
5667 if (DTU) {
5668 DTU->applyUpdates(Updates);
5669 Updates.clear();
5670 }
5671 removeUnwindEdge(TI->getParent(), DTU);
5672 Changed = true;
5673 continue;
5674 }
5675
5676 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5677 E = CSI->handler_end();
5678 I != E; ++I) {
5679 if (*I == BB) {
5680 CSI->removeHandler(I);
5681 --I;
5682 --E;
5683 Changed = true;
5684 }
5685 }
5686 if (DTU)
5687 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5688 if (CSI->getNumHandlers() == 0) {
5689 if (CSI->hasUnwindDest()) {
5690 // Redirect all predecessors of the block containing CatchSwitchInst
5691 // to instead branch to the CatchSwitchInst's unwind destination.
5692 if (DTU) {
5693 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5694 Updates.push_back({DominatorTree::Insert,
5695 PredecessorOfPredecessor,
5696 CSI->getUnwindDest()});
5697 Updates.push_back({DominatorTree::Delete,
5698 PredecessorOfPredecessor, Predecessor});
5699 }
5700 }
5701 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5702 } else {
5703 // Rewrite all preds to unwind to caller (or from invoke to call).
5704 if (DTU) {
5705 DTU->applyUpdates(Updates);
5706 Updates.clear();
5707 }
5708 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5709 for (BasicBlock *EHPred : EHPreds)
5710 removeUnwindEdge(EHPred, DTU);
5711 }
5712 // The catchswitch is no longer reachable.
5713 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5714 CSI->eraseFromParent();
5715 Changed = true;
5716 }
5717 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5718 (void)CRI;
5719 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5720 "Expected to always have an unwind to BB.");
5721 if (DTU)
5722 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5723 new UnreachableInst(TI->getContext(), TI->getIterator());
5724 TI->eraseFromParent();
5725 Changed = true;
5726 }
5727 }
5728
5729 if (DTU)
5730 DTU->applyUpdates(Updates);
5731
5732 // If this block is now dead, remove it.
5733 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5734 DeleteDeadBlock(BB, DTU);
5735 return true;
5736 }
5737
5738 return Changed;
5739}
5740
5742 assert(Cases.size() >= 1);
5743
5745 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5746 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5747 return false;
5748 }
5749 return true;
5750}
5751
5753 DomTreeUpdater *DTU,
5754 bool RemoveOrigDefaultBlock = true) {
5755 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5756 auto *BB = Switch->getParent();
5757 auto *OrigDefaultBlock = Switch->getDefaultDest();
5758 if (RemoveOrigDefaultBlock)
5759 OrigDefaultBlock->removePredecessor(BB);
5760 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5761 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5762 OrigDefaultBlock);
5763 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5764 Switch->setDefaultDest(&*NewDefaultBlock);
5765 if (DTU) {
5767 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5768 if (RemoveOrigDefaultBlock &&
5769 !is_contained(successors(BB), OrigDefaultBlock))
5770 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5771 DTU->applyUpdates(Updates);
5772 }
5773}
5774
5775/// Turn a switch into an integer range comparison and branch.
5776/// Switches with more than 2 destinations are ignored.
5777/// Switches with 1 destination are also ignored.
5778bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5779 IRBuilder<> &Builder) {
5780 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5781
5782 bool HasDefault =
5783 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5784
5785 auto *BB = SI->getParent();
5786
5787 // Partition the cases into two sets with different destinations.
5788 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5789 BasicBlock *DestB = nullptr;
5792
5793 for (auto Case : SI->cases()) {
5794 BasicBlock *Dest = Case.getCaseSuccessor();
5795 if (!DestA)
5796 DestA = Dest;
5797 if (Dest == DestA) {
5798 CasesA.push_back(Case.getCaseValue());
5799 continue;
5800 }
5801 if (!DestB)
5802 DestB = Dest;
5803 if (Dest == DestB) {
5804 CasesB.push_back(Case.getCaseValue());
5805 continue;
5806 }
5807 return false; // More than two destinations.
5808 }
5809 if (!DestB)
5810 return false; // All destinations are the same and the default is unreachable
5811
5812 assert(DestA && DestB &&
5813 "Single-destination switch should have been folded.");
5814 assert(DestA != DestB);
5815 assert(DestB != SI->getDefaultDest());
5816 assert(!CasesB.empty() && "There must be non-default cases.");
5817 assert(!CasesA.empty() || HasDefault);
5818
5819 // Figure out if one of the sets of cases form a contiguous range.
5820 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5821 BasicBlock *ContiguousDest = nullptr;
5822 BasicBlock *OtherDest = nullptr;
5823 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5824 ContiguousCases = &CasesA;
5825 ContiguousDest = DestA;
5826 OtherDest = DestB;
5827 } else if (casesAreContiguous(CasesB)) {
5828 ContiguousCases = &CasesB;
5829 ContiguousDest = DestB;
5830 OtherDest = DestA;
5831 } else
5832 return false;
5833
5834 // Start building the compare and branch.
5835
5836 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5837 Constant *NumCases =
5838 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5839
5840 Value *Sub = SI->getCondition();
5841 if (!Offset->isNullValue())
5842 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5843
5844 Value *Cmp;
5845 // If NumCases overflowed, then all possible values jump to the successor.
5846 if (NumCases->isNullValue() && !ContiguousCases->empty())
5847 Cmp = ConstantInt::getTrue(SI->getContext());
5848 else
5849 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5850 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5851
5852 // Update weight for the newly-created conditional branch.
5853 if (hasBranchWeightMD(*SI)) {
5855 getBranchWeights(SI, Weights);
5856 if (Weights.size() == 1 + SI->getNumCases()) {
5857 uint64_t TrueWeight = 0;
5858 uint64_t FalseWeight = 0;
5859 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5860 if (SI->getSuccessor(I) == ContiguousDest)
5861 TrueWeight += Weights[I];
5862 else
5863 FalseWeight += Weights[I];
5864 }
5865 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5866 TrueWeight /= 2;
5867 FalseWeight /= 2;
5868 }
5869 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5870 }
5871 }
5872
5873 // Prune obsolete incoming values off the successors' PHI nodes.
5874 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5875 unsigned PreviousEdges = ContiguousCases->size();
5876 if (ContiguousDest == SI->getDefaultDest())
5877 ++PreviousEdges;
5878 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5879 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5880 }
5881 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5882 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5883 if (OtherDest == SI->getDefaultDest())
5884 ++PreviousEdges;
5885 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5886 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5887 }
5888
5889 // Clean up the default block - it may have phis or other instructions before
5890 // the unreachable terminator.
5891 if (!HasDefault)
5893
5894 auto *UnreachableDefault = SI->getDefaultDest();
5895
5896 // Drop the switch.
5897 SI->eraseFromParent();
5898
5899 if (!HasDefault && DTU)
5900 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5901
5902 return true;
5903}
5904
5905/// Compute masked bits for the condition of a switch
5906/// and use it to remove dead cases.
5908 AssumptionCache *AC,
5909 const DataLayout &DL) {
5910 Value *Cond = SI->getCondition();
5911 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5912
5913 // We can also eliminate cases by determining that their values are outside of
5914 // the limited range of the condition based on how many significant (non-sign)
5915 // bits are in the condition value.
5916 unsigned MaxSignificantBitsInCond =
5917 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5918
5919 // Gather dead cases.
5921 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5922 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5923 for (const auto &Case : SI->cases()) {
5924 auto *Successor = Case.getCaseSuccessor();
5925 if (DTU) {
5926 if (!NumPerSuccessorCases.count(Successor))
5927 UniqueSuccessors.push_back(Successor);
5928 ++NumPerSuccessorCases[Successor];
5929 }
5930 const APInt &CaseVal = Case.getCaseValue()->getValue();
5931 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5932 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5933 DeadCases.push_back(Case.getCaseValue());
5934 if (DTU)
5935 --NumPerSuccessorCases[Successor];
5936 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5937 << " is dead.\n");
5938 }
5939 }
5940
5941 // If we can prove that the cases must cover all possible values, the
5942 // default destination becomes dead and we can remove it. If we know some
5943 // of the bits in the value, we can use that to more precisely compute the
5944 // number of possible unique case values.
5945 bool HasDefault =
5946 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5947 const unsigned NumUnknownBits =
5948 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5949 assert(NumUnknownBits <= Known.getBitWidth());
5950 if (HasDefault && DeadCases.empty() &&
5951 NumUnknownBits < 64 /* avoid overflow */) {
5952 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5953 if (SI->getNumCases() == AllNumCases) {
5955 return true;
5956 }
5957 // When only one case value is missing, replace default with that case.
5958 // Eliminating the default branch will provide more opportunities for
5959 // optimization, such as lookup tables.
5960 if (SI->getNumCases() == AllNumCases - 1) {
5961 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5962 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5963 if (CondTy->getIntegerBitWidth() > 64 ||
5964 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5965 return false;
5966
5967 uint64_t MissingCaseVal = 0;
5968 for (const auto &Case : SI->cases())
5969 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5970 auto *MissingCase =
5971 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5973 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5974 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5975 SIW.setSuccessorWeight(0, 0);
5976 return true;
5977 }
5978 }
5979
5980 if (DeadCases.empty())
5981 return false;
5982
5984 for (ConstantInt *DeadCase : DeadCases) {
5985 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5986 assert(CaseI != SI->case_default() &&
5987 "Case was not found. Probably mistake in DeadCases forming.");
5988 // Prune unused values from PHI nodes.
5989 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5990 SIW.removeCase(CaseI);
5991 }
5992
5993 if (DTU) {
5994 std::vector<DominatorTree::UpdateType> Updates;
5995 for (auto *Successor : UniqueSuccessors)
5996 if (NumPerSuccessorCases[Successor] == 0)
5997 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5998 DTU->applyUpdates(Updates);
5999 }
6000
6001 return true;
6002}
6003
6004/// If BB would be eligible for simplification by
6005/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6006/// by an unconditional branch), look at the phi node for BB in the successor
6007/// block and see if the incoming value is equal to CaseValue. If so, return
6008/// the phi node, and set PhiIndex to BB's index in the phi node.
6010 BasicBlock *BB, int *PhiIndex) {
6011 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
6012 return nullptr; // BB must be empty to be a candidate for simplification.
6013 if (!BB->getSinglePredecessor())
6014 return nullptr; // BB must be dominated by the switch.
6015
6016 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
6017 if (!Branch || !Branch->isUnconditional())
6018 return nullptr; // Terminator must be unconditional branch.
6019
6020 BasicBlock *Succ = Branch->getSuccessor(0);
6021
6022 for (PHINode &PHI : Succ->phis()) {
6023 int Idx = PHI.getBasicBlockIndex(BB);
6024 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6025
6026 Value *InValue = PHI.getIncomingValue(Idx);
6027 if (InValue != CaseValue)
6028 continue;
6029
6030 *PhiIndex = Idx;
6031 return &PHI;
6032 }
6033
6034 return nullptr;
6035}
6036
6037/// Try to forward the condition of a switch instruction to a phi node
6038/// dominated by the switch, if that would mean that some of the destination
6039/// blocks of the switch can be folded away. Return true if a change is made.
6041 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6042
6043 ForwardingNodesMap ForwardingNodes;
6044 BasicBlock *SwitchBlock = SI->getParent();
6045 bool Changed = false;
6046 for (const auto &Case : SI->cases()) {
6047 ConstantInt *CaseValue = Case.getCaseValue();
6048 BasicBlock *CaseDest = Case.getCaseSuccessor();
6049
6050 // Replace phi operands in successor blocks that are using the constant case
6051 // value rather than the switch condition variable:
6052 // switchbb:
6053 // switch i32 %x, label %default [
6054 // i32 17, label %succ
6055 // ...
6056 // succ:
6057 // %r = phi i32 ... [ 17, %switchbb ] ...
6058 // -->
6059 // %r = phi i32 ... [ %x, %switchbb ] ...
6060
6061 for (PHINode &Phi : CaseDest->phis()) {
6062 // This only works if there is exactly 1 incoming edge from the switch to
6063 // a phi. If there is >1, that means multiple cases of the switch map to 1
6064 // value in the phi, and that phi value is not the switch condition. Thus,
6065 // this transform would not make sense (the phi would be invalid because
6066 // a phi can't have different incoming values from the same block).
6067 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6068 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6069 count(Phi.blocks(), SwitchBlock) == 1) {
6070 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6071 Changed = true;
6072 }
6073 }
6074
6075 // Collect phi nodes that are indirectly using this switch's case constants.
6076 int PhiIdx;
6077 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6078 ForwardingNodes[Phi].push_back(PhiIdx);
6079 }
6080
6081 for (auto &ForwardingNode : ForwardingNodes) {
6082 PHINode *Phi = ForwardingNode.first;
6083 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6084 // Check if it helps to fold PHI.
6085 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6086 continue;
6087
6088 for (int Index : Indexes)
6089 Phi->setIncomingValue(Index, SI->getCondition());
6090 Changed = true;
6091 }
6092
6093 return Changed;
6094}
6095
6096/// Return true if the backend will be able to handle
6097/// initializing an array of constants like C.
6099 if (C->isThreadDependent())
6100 return false;
6101 if (C->isDLLImportDependent())
6102 return false;
6103
6104 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6105 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6106 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6107 return false;
6108
6109 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6110 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6111 // materializing the array of constants.
6112 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6113 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6114 return false;
6115 }
6116
6118 return false;
6119
6120 return true;
6121}
6122
6123/// If V is a Constant, return it. Otherwise, try to look up
6124/// its constant value in ConstantPool, returning 0 if it's not there.
6125static Constant *
6128 if (Constant *C = dyn_cast<Constant>(V))
6129 return C;
6130 return ConstantPool.lookup(V);
6131}
6132
6133/// Try to fold instruction I into a constant. This works for
6134/// simple instructions such as binary operations where both operands are
6135/// constant or can be replaced by constants from the ConstantPool. Returns the
6136/// resulting constant on success, 0 otherwise.
6137static Constant *
6140 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6141 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6142 if (!A)
6143 return nullptr;
6144 if (A->isAllOnesValue())
6145 return lookupConstant(Select->getTrueValue(), ConstantPool);
6146 if (A->isNullValue())
6147 return lookupConstant(Select->getFalseValue(), ConstantPool);
6148 return nullptr;
6149 }
6150
6152 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6153 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6154 COps.push_back(A);
6155 else
6156 return nullptr;
6157 }
6158
6159 return ConstantFoldInstOperands(I, COps, DL);
6160}
6161
6162/// Try to determine the resulting constant values in phi nodes
6163/// at the common destination basic block, *CommonDest, for one of the case
6164/// destionations CaseDest corresponding to value CaseVal (0 for the default
6165/// case), of a switch instruction SI.
6166static bool
6168 BasicBlock **CommonDest,
6169 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6170 const DataLayout &DL, const TargetTransformInfo &TTI) {
6171 // The block from which we enter the common destination.
6172 BasicBlock *Pred = SI->getParent();
6173
6174 // If CaseDest is empty except for some side-effect free instructions through
6175 // which we can constant-propagate the CaseVal, continue to its successor.
6177 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6178 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6179 if (I.isTerminator()) {
6180 // If the terminator is a simple branch, continue to the next block.
6181 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6182 return false;
6183 Pred = CaseDest;
6184 CaseDest = I.getSuccessor(0);
6185 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6186 // Instruction is side-effect free and constant.
6187
6188 // If the instruction has uses outside this block or a phi node slot for
6189 // the block, it is not safe to bypass the instruction since it would then
6190 // no longer dominate all its uses.
6191 for (auto &Use : I.uses()) {
6192 User *User = Use.getUser();
6193 if (Instruction *I = dyn_cast<Instruction>(User))
6194 if (I->getParent() == CaseDest)
6195 continue;
6196 if (PHINode *Phi = dyn_cast<PHINode>(User))
6197 if (Phi->getIncomingBlock(Use) == CaseDest)
6198 continue;
6199 return false;
6200 }
6201
6202 ConstantPool.insert(std::make_pair(&I, C));
6203 } else {
6204 break;
6205 }
6206 }
6207
6208 // If we did not have a CommonDest before, use the current one.
6209 if (!*CommonDest)
6210 *CommonDest = CaseDest;
6211 // If the destination isn't the common one, abort.
6212 if (CaseDest != *CommonDest)
6213 return false;
6214
6215 // Get the values for this case from phi nodes in the destination block.
6216 for (PHINode &PHI : (*CommonDest)->phis()) {
6217 int Idx = PHI.getBasicBlockIndex(Pred);
6218 if (Idx == -1)
6219 continue;
6220
6221 Constant *ConstVal =
6222 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6223 if (!ConstVal)
6224 return false;
6225
6226 // Be conservative about which kinds of constants we support.
6227 if (!validLookupTableConstant(ConstVal, TTI))
6228 return false;
6229
6230 Res.push_back(std::make_pair(&PHI, ConstVal));
6231 }
6232
6233 return Res.size() > 0;
6234}
6235
6236// Helper function used to add CaseVal to the list of cases that generate
6237// Result. Returns the updated number of cases that generate this result.
6238static size_t mapCaseToResult(ConstantInt *CaseVal,
6239 SwitchCaseResultVectorTy &UniqueResults,
6240 Constant *Result) {
6241 for (auto &I : UniqueResults) {
6242 if (I.first == Result) {
6243 I.second.push_back(CaseVal);
6244 return I.second.size();
6245 }
6246 }
6247 UniqueResults.push_back(
6248 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6249 return 1;
6250}
6251
6252// Helper function that initializes a map containing
6253// results for the PHI node of the common destination block for a switch
6254// instruction. Returns false if multiple PHI nodes have been found or if
6255// there is not a common destination block for the switch.
6257 BasicBlock *&CommonDest,
6258 SwitchCaseResultVectorTy &UniqueResults,
6259 Constant *&DefaultResult,
6260 const DataLayout &DL,
6261 const TargetTransformInfo &TTI,
6262 uintptr_t MaxUniqueResults) {
6263 for (const auto &I : SI->cases()) {
6264 ConstantInt *CaseVal = I.getCaseValue();
6265
6266 // Resulting value at phi nodes for this case value.
6267 SwitchCaseResultsTy Results;
6268 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6269 DL, TTI))
6270 return false;
6271
6272 // Only one value per case is permitted.
6273 if (Results.size() > 1)
6274 return false;
6275
6276 // Add the case->result mapping to UniqueResults.
6277 const size_t NumCasesForResult =
6278 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6279
6280 // Early out if there are too many cases for this result.
6281 if (NumCasesForResult > MaxSwitchCasesPerResult)
6282 return false;
6283
6284 // Early out if there are too many unique results.
6285 if (UniqueResults.size() > MaxUniqueResults)
6286 return false;
6287
6288 // Check the PHI consistency.
6289 if (!PHI)
6290 PHI = Results[0].first;
6291 else if (PHI != Results[0].first)
6292 return false;
6293 }
6294 // Find the default result value.
6296 BasicBlock *DefaultDest = SI->getDefaultDest();
6297 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6298 DL, TTI);
6299 // If the default value is not found abort unless the default destination
6300 // is unreachable.
6301 DefaultResult =
6302 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6303 if ((!DefaultResult &&
6304 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6305 return false;
6306
6307 return true;
6308}
6309
6310// Helper function that checks if it is possible to transform a switch with only
6311// two cases (or two cases + default) that produces a result into a select.
6312// TODO: Handle switches with more than 2 cases that map to the same result.
6313static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6314 Constant *DefaultResult, Value *Condition,
6315 IRBuilder<> &Builder) {
6316 // If we are selecting between only two cases transform into a simple
6317 // select or a two-way select if default is possible.
6318 // Example:
6319 // switch (a) { %0 = icmp eq i32 %a, 10
6320 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6321 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6322 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6323 // }
6324 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6325 ResultVector[1].second.size() == 1) {
6326 ConstantInt *FirstCase = ResultVector[0].second[0];
6327 ConstantInt *SecondCase = ResultVector[1].second[0];
6328 Value *SelectValue = ResultVector[1].first;
6329 if (DefaultResult) {
6330 Value *ValueCompare =
6331 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6332 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6333 DefaultResult, "switch.select");
6334 }
6335 Value *ValueCompare =
6336 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6337 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6338 SelectValue, "switch.select");
6339 }
6340
6341 // Handle the degenerate case where two cases have the same result value.
6342 if (ResultVector.size() == 1 && DefaultResult) {
6343 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6344 unsigned CaseCount = CaseValues.size();
6345 // n bits group cases map to the same result:
6346 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6347 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6348 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6349 if (isPowerOf2_32(CaseCount)) {
6350 ConstantInt *MinCaseVal = CaseValues[0];
6351 // Find mininal value.
6352 for (auto *Case : CaseValues)
6353 if (Case->getValue().slt(MinCaseVal->getValue()))
6354 MinCaseVal = Case;
6355
6356 // Mark the bits case number touched.
6357 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6358 for (auto *Case : CaseValues)
6359 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6360
6361 // Check if cases with the same result can cover all number
6362 // in touched bits.
6363 if (BitMask.popcount() == Log2_32(CaseCount)) {
6364 if (!MinCaseVal->isNullValue())
6365 Condition = Builder.CreateSub(Condition, MinCaseVal);
6366 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6367 Value *Cmp = Builder.CreateICmpEQ(
6368 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6369 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6370 }
6371 }
6372
6373 // Handle the degenerate case where two cases have the same value.
6374 if (CaseValues.size() == 2) {
6375 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6376 "switch.selectcmp.case1");
6377 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6378 "switch.selectcmp.case2");
6379 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6380 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6381 }
6382 }
6383
6384 return nullptr;
6385}
6386
6387// Helper function to cleanup a switch instruction that has been converted into
6388// a select, fixing up PHI nodes and basic blocks.
6390 Value *SelectValue,
6391 IRBuilder<> &Builder,
6392 DomTreeUpdater *DTU) {
6393 std::vector<DominatorTree::UpdateType> Updates;
6394
6395 BasicBlock *SelectBB = SI->getParent();
6396 BasicBlock *DestBB = PHI->getParent();
6397
6398 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6399 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6400 Builder.CreateBr(DestBB);
6401
6402 // Remove the switch.
6403
6404 PHI->removeIncomingValueIf(
6405 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6406 PHI->addIncoming(SelectValue, SelectBB);
6407
6408 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6409 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6410 BasicBlock *Succ = SI->getSuccessor(i);
6411
6412 if (Succ == DestBB)
6413 continue;
6414 Succ->removePredecessor(SelectBB);
6415 if (DTU && RemovedSuccessors.insert(Succ).second)
6416 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6417 }
6418 SI->eraseFromParent();
6419 if (DTU)
6420 DTU->applyUpdates(Updates);
6421}
6422
6423/// If a switch is only used to initialize one or more phi nodes in a common
6424/// successor block with only two different constant values, try to replace the
6425/// switch with a select. Returns true if the fold was made.
6426static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6427 DomTreeUpdater *DTU, const DataLayout &DL,
6428 const TargetTransformInfo &TTI) {
6429 Value *const Cond = SI->getCondition();
6430 PHINode *PHI = nullptr;
6431 BasicBlock *CommonDest = nullptr;
6432 Constant *DefaultResult;
6433 SwitchCaseResultVectorTy UniqueResults;
6434 // Collect all the cases that will deliver the same value from the switch.
6435 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6436 DL, TTI, /*MaxUniqueResults*/ 2))
6437 return false;
6438
6439 assert(PHI != nullptr && "PHI for value select not found");
6440 Builder.SetInsertPoint(SI);
6441 Value *SelectValue =
6442 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6443 if (!SelectValue)
6444 return false;
6445
6446 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6447 return true;
6448}
6449
6450namespace {
6451
6452/// This class represents a lookup table that can be used to replace a switch.
6453class SwitchLookupTable {
6454public:
6455 /// Create a lookup table to use as a switch replacement with the contents
6456 /// of Values, using DefaultValue to fill any holes in the table.
6457 SwitchLookupTable(
6458 Module &M, uint64_t TableSize, ConstantInt *Offset,
6459 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6460 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6461
6462 /// Build instructions with Builder to retrieve the value at
6463 /// the position given by Index in the lookup table.
6464 Value *buildLookup(Value *Index, IRBuilder<> &Builder);
6465
6466 /// Return true if a table with TableSize elements of
6467 /// type ElementType would fit in a target-legal register.
6468 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6469 Type *ElementType);
6470
6471private:
6472 // Depending on the contents of the table, it can be represented in
6473 // different ways.
6474 enum {
6475 // For tables where each element contains the same value, we just have to
6476 // store that single value and return it for each lookup.
6477 SingleValueKind,
6478
6479 // For tables where there is a linear relationship between table index
6480 // and values. We calculate the result with a simple multiplication
6481 // and addition instead of a table lookup.
6482 LinearMapKind,
6483
6484 // For small tables with integer elements, we can pack them into a bitmap
6485 // that fits into a target-legal register. Values are retrieved by
6486 // shift and mask operations.
6487 BitMapKind,
6488
6489 // The table is stored as an array of values. Values are retrieved by load
6490 // instructions from the table.
6491 ArrayKind
6492 } Kind;
6493
6494 // For SingleValueKind, this is the single value.
6495 Constant *SingleValue = nullptr;
6496
6497 // For BitMapKind, this is the bitmap.
6498 ConstantInt *BitMap = nullptr;
6499 IntegerType *BitMapElementTy = nullptr;
6500
6501 // For LinearMapKind, these are the constants used to derive the value.
6502 ConstantInt *LinearOffset = nullptr;
6503 ConstantInt *LinearMultiplier = nullptr;
6504 bool LinearMapValWrapped = false;
6505
6506 // For ArrayKind, this is the array.
6507 GlobalVariable *Array = nullptr;
6508};
6509
6510} // end anonymous namespace
6511
6512SwitchLookupTable::SwitchLookupTable(
6513 Module &M, uint64_t TableSize, ConstantInt *Offset,
6514 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6515 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6516 assert(Values.size() && "Can't build lookup table without values!");
6517 assert(TableSize >= Values.size() && "Can't fit values in table!");
6518
6519 // If all values in the table are equal, this is that value.
6520 SingleValue = Values.begin()->second;
6521
6522 Type *ValueType = Values.begin()->second->getType();
6523
6524 // Build up the table contents.
6525 SmallVector<Constant *, 64> TableContents(TableSize);
6526 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6527 ConstantInt *CaseVal = Values[I].first;
6528 Constant *CaseRes = Values[I].second;
6529 assert(CaseRes->getType() == ValueType);
6530
6531 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6532 TableContents[Idx] = CaseRes;
6533
6534 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6535 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6536 }
6537
6538 // Fill in any holes in the table with the default result.
6539 if (Values.size() < TableSize) {
6540 assert(DefaultValue &&
6541 "Need a default value to fill the lookup table holes.");
6542 assert(DefaultValue->getType() == ValueType);
6543 for (uint64_t I = 0; I < TableSize; ++I) {
6544 if (!TableContents[I])
6545 TableContents[I] = DefaultValue;
6546 }
6547
6548 // If the default value is poison, all the holes are poison.
6549 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6550
6551 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6552 SingleValue = nullptr;
6553 }
6554
6555 // If each element in the table contains the same value, we only need to store
6556 // that single value.
6557 if (SingleValue) {
6558 Kind = SingleValueKind;
6559 return;
6560 }
6561
6562 // Check if we can derive the value with a linear transformation from the
6563 // table index.
6564 if (isa<IntegerType>(ValueType)) {
6565 bool LinearMappingPossible = true;
6566 APInt PrevVal;
6567 APInt DistToPrev;
6568 // When linear map is monotonic and signed overflow doesn't happen on
6569 // maximum index, we can attach nsw on Add and Mul.
6570 bool NonMonotonic = false;
6571 assert(TableSize >= 2 && "Should be a SingleValue table.");
6572 // Check if there is the same distance between two consecutive values.
6573 for (uint64_t I = 0; I < TableSize; ++I) {
6574 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6575
6576 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6577 // This is an poison, so it's (probably) a lookup table hole.
6578 // To prevent any regressions from before we switched to using poison as
6579 // the default value, holes will fall back to using the first value.
6580 // This can be removed once we add proper handling for poisons in lookup
6581 // tables.
6582 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6583 }
6584
6585 if (!ConstVal) {
6586 // This is an undef. We could deal with it, but undefs in lookup tables
6587 // are very seldom. It's probably not worth the additional complexity.
6588 LinearMappingPossible = false;
6589 break;
6590 }
6591 const APInt &Val = ConstVal->getValue();
6592 if (I != 0) {
6593 APInt Dist = Val - PrevVal;
6594 if (I == 1) {
6595 DistToPrev = Dist;
6596 } else if (Dist != DistToPrev) {
6597 LinearMappingPossible = false;
6598 break;
6599 }
6600 NonMonotonic |=
6601 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6602 }
6603 PrevVal = Val;
6604 }
6605 if (LinearMappingPossible) {
6606 LinearOffset = cast<ConstantInt>(TableContents[0]);
6607 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6608 APInt M = LinearMultiplier->getValue();
6609 bool MayWrap = true;
6610 if (isIntN(M.getBitWidth(), TableSize - 1))
6611 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6612 LinearMapValWrapped = NonMonotonic || MayWrap;
6613 Kind = LinearMapKind;
6614 ++NumLinearMaps;
6615 return;
6616 }
6617 }
6618
6619 // If the type is integer and the table fits in a register, build a bitmap.
6620 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6621 IntegerType *IT = cast<IntegerType>(ValueType);
6622 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6623 for (uint64_t I = TableSize; I > 0; --I) {
6624 TableInt <<= IT->getBitWidth();
6625 // Insert values into the bitmap. Undef values are set to zero.
6626 if (!isa<UndefValue>(TableContents[I - 1])) {
6627 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6628 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6629 }
6630 }
6631 BitMap = ConstantInt::get(M.getContext(), TableInt);
6632 BitMapElementTy = IT;
6633 Kind = BitMapKind;
6634 ++NumBitMaps;
6635 return;
6636 }
6637
6638 // Store the table in an array.
6639 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6640 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6641
6642 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6643 GlobalVariable::PrivateLinkage, Initializer,
6644 "switch.table." + FuncName);
6645 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6646 // Set the alignment to that of an array items. We will be only loading one
6647 // value out of it.
6648 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6649 Kind = ArrayKind;
6650}
6651
6652Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) {
6653 switch (Kind) {
6654 case SingleValueKind:
6655 return SingleValue;
6656 case LinearMapKind: {
6657 // Derive the result value from the input value.
6658 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6659 false, "switch.idx.cast");
6660 if (!LinearMultiplier->isOne())
6661 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6662 /*HasNUW = */ false,
6663 /*HasNSW = */ !LinearMapValWrapped);
6664
6665 if (!LinearOffset->isZero())
6666 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6667 /*HasNUW = */ false,
6668 /*HasNSW = */ !LinearMapValWrapped);
6669 return Result;
6670 }
6671 case BitMapKind: {
6672 // Type of the bitmap (e.g. i59).
6673 IntegerType *MapTy = BitMap->getIntegerType();
6674
6675 // Cast Index to the same type as the bitmap.
6676 // Note: The Index is <= the number of elements in the table, so
6677 // truncating it to the width of the bitmask is safe.
6678 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6679
6680 // Multiply the shift amount by the element width. NUW/NSW can always be
6681 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6682 // BitMap's bit width.
6683 ShiftAmt = Builder.CreateMul(
6684 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6685 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6686
6687 // Shift down.
6688 Value *DownShifted =
6689 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6690 // Mask off.
6691 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6692 }
6693 case ArrayKind: {
6694 // Make sure the table index will not overflow when treated as signed.
6695 IntegerType *IT = cast<IntegerType>(Index->getType());
6696 uint64_t TableSize =
6697 Array->getInitializer()->getType()->getArrayNumElements();
6698 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6699 Index = Builder.CreateZExt(
6700 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6701 "switch.tableidx.zext");
6702
6703 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6704 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6705 GEPIndices, "switch.gep");
6706 return Builder.CreateLoad(
6707 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6708 "switch.load");
6709 }
6710 }
6711 llvm_unreachable("Unknown lookup table kind!");
6712}
6713
6714bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6715 uint64_t TableSize,
6716 Type *ElementType) {
6717 auto *IT = dyn_cast<IntegerType>(ElementType);
6718 if (!IT)
6719 return false;
6720 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6721 // are <= 15, we could try to narrow the type.
6722
6723 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6724 if (TableSize >= UINT_MAX / IT->getBitWidth())
6725 return false;
6726 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6727}
6728
6730 const DataLayout &DL) {
6731 // Allow any legal type.
6732 if (TTI.isTypeLegal(Ty))
6733 return true;
6734
6735 auto *IT = dyn_cast<IntegerType>(Ty);
6736 if (!IT)
6737 return false;
6738
6739 // Also allow power of 2 integer types that have at least 8 bits and fit in
6740 // a register. These types are common in frontend languages and targets
6741 // usually support loads of these types.
6742 // TODO: We could relax this to any integer that fits in a register and rely
6743 // on ABI alignment and padding in the table to allow the load to be widened.
6744 // Or we could widen the constants and truncate the load.
6745 unsigned BitWidth = IT->getBitWidth();
6746 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6747 DL.fitsInLegalInteger(IT->getBitWidth());
6748}
6749
6750static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6751 // 40% is the default density for building a jump table in optsize/minsize
6752 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6753 // function was based on.
6754 const uint64_t MinDensity = 40;
6755
6756 if (CaseRange >= UINT64_MAX / 100)
6757 return false; // Avoid multiplication overflows below.
6758
6759 return NumCases * 100 >= CaseRange * MinDensity;
6760}
6761
6763 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6764 uint64_t Range = Diff + 1;
6765 if (Range < Diff)
6766 return false; // Overflow.
6767
6768 return isSwitchDense(Values.size(), Range);
6769}
6770
6771/// Determine whether a lookup table should be built for this switch, based on
6772/// the number of cases, size of the table, and the types of the results.
6773// TODO: We could support larger than legal types by limiting based on the
6774// number of loads required and/or table size. If the constants are small we
6775// could use smaller table entries and extend after the load.
6776static bool
6778 const TargetTransformInfo &TTI, const DataLayout &DL,
6779 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6780 if (SI->getNumCases() > TableSize)
6781 return false; // TableSize overflowed.
6782
6783 bool AllTablesFitInRegister = true;
6784 bool HasIllegalType = false;
6785 for (const auto &I : ResultTypes) {
6786 Type *Ty = I.second;
6787
6788 // Saturate this flag to true.
6789 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6790
6791 // Saturate this flag to false.
6792 AllTablesFitInRegister =
6793 AllTablesFitInRegister &&
6794 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6795
6796 // If both flags saturate, we're done. NOTE: This *only* works with
6797 // saturating flags, and all flags have to saturate first due to the
6798 // non-deterministic behavior of iterating over a dense map.
6799 if (HasIllegalType && !AllTablesFitInRegister)
6800 break;
6801 }
6802
6803 // If each table would fit in a register, we should build it anyway.
6804 if (AllTablesFitInRegister)
6805 return true;
6806
6807 // Don't build a table that doesn't fit in-register if it has illegal types.
6808 if (HasIllegalType)
6809 return false;
6810
6811 return isSwitchDense(SI->getNumCases(), TableSize);
6812}
6813
6815 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6816 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6817 const DataLayout &DL, const TargetTransformInfo &TTI) {
6818 if (MinCaseVal.isNullValue())
6819 return true;
6820 if (MinCaseVal.isNegative() ||
6821 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6822 !HasDefaultResults)
6823 return false;
6824 return all_of(ResultTypes, [&](const auto &KV) {
6825 return SwitchLookupTable::wouldFitInRegister(
6826 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6827 KV.second /* ResultType */);
6828 });
6829}
6830
6831/// Try to reuse the switch table index compare. Following pattern:
6832/// \code
6833/// if (idx < tablesize)
6834/// r = table[idx]; // table does not contain default_value
6835/// else
6836/// r = default_value;
6837/// if (r != default_value)
6838/// ...
6839/// \endcode
6840/// Is optimized to:
6841/// \code
6842/// cond = idx < tablesize;
6843/// if (cond)
6844/// r = table[idx];
6845/// else
6846/// r = default_value;
6847/// if (cond)
6848/// ...
6849/// \endcode
6850/// Jump threading will then eliminate the second if(cond).
6852 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6853 Constant *DefaultValue,
6854 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6855 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6856 if (!CmpInst)
6857 return;
6858
6859 // We require that the compare is in the same block as the phi so that jump
6860 // threading can do its work afterwards.
6861 if (CmpInst->getParent() != PhiBlock)
6862 return;
6863
6864 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6865 if (!CmpOp1)
6866 return;
6867
6868 Value *RangeCmp = RangeCheckBranch->getCondition();
6869 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6870 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6871
6872 // Check if the compare with the default value is constant true or false.
6873 const DataLayout &DL = PhiBlock->getDataLayout();
6875 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6876 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6877 return;
6878
6879 // Check if the compare with the case values is distinct from the default
6880 // compare result.
6881 for (auto ValuePair : Values) {
6883 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6884 if (!CaseConst || CaseConst == DefaultConst ||
6885 (CaseConst != TrueConst && CaseConst != FalseConst))
6886 return;
6887 }
6888
6889 // Check if the branch instruction dominates the phi node. It's a simple
6890 // dominance check, but sufficient for our needs.
6891 // Although this check is invariant in the calling loops, it's better to do it
6892 // at this late stage. Practically we do it at most once for a switch.
6893 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6894 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6895 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6896 return;
6897 }
6898
6899 if (DefaultConst == FalseConst) {
6900 // The compare yields the same result. We can replace it.
6901 CmpInst->replaceAllUsesWith(RangeCmp);
6902 ++NumTableCmpReuses;
6903 } else {
6904 // The compare yields the same result, just inverted. We can replace it.
6905 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6906 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6907 RangeCheckBranch->getIterator());
6908 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6909 ++NumTableCmpReuses;
6910 }
6911}
6912
6913/// If the switch is only used to initialize one or more phi nodes in a common
6914/// successor block with different constant values, replace the switch with
6915/// lookup tables.
6917 DomTreeUpdater *DTU, const DataLayout &DL,
6918 const TargetTransformInfo &TTI) {
6919 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6920
6921 BasicBlock *BB = SI->getParent();
6922 Function *Fn = BB->getParent();
6923 // Only build lookup table when we have a target that supports it or the
6924 // attribute is not set.
6926 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6927 return false;
6928
6929 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6930 // split off a dense part and build a lookup table for that.
6931
6932 // FIXME: This creates arrays of GEPs to constant strings, which means each
6933 // GEP needs a runtime relocation in PIC code. We should just build one big
6934 // string and lookup indices into that.
6935
6936 // Ignore switches with less than three cases. Lookup tables will not make
6937 // them faster, so we don't analyze them.
6938 if (SI->getNumCases() < 3)
6939 return false;
6940
6941 // Figure out the corresponding result for each case value and phi node in the
6942 // common destination, as well as the min and max case values.
6943 assert(!SI->cases().empty());
6944 SwitchInst::CaseIt CI = SI->case_begin();
6945 ConstantInt *MinCaseVal = CI->getCaseValue();
6946 ConstantInt *MaxCaseVal = CI->getCaseValue();
6947
6948 BasicBlock *CommonDest = nullptr;
6949
6950 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6952
6956
6957 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6958 ConstantInt *CaseVal = CI->getCaseValue();
6959 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6960 MinCaseVal = CaseVal;
6961 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6962 MaxCaseVal = CaseVal;
6963
6964 // Resulting value at phi nodes for this case value.
6966 ResultsTy Results;
6967 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6968 Results, DL, TTI))
6969 return false;
6970
6971 // Append the result from this case to the list for each phi.
6972 for (const auto &I : Results) {
6973 PHINode *PHI = I.first;
6974 Constant *Value = I.second;
6975 if (!ResultLists.count(PHI))
6976 PHIs.push_back(PHI);
6977 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6978 }
6979 }
6980
6981 // Keep track of the result types.
6982 for (PHINode *PHI : PHIs) {
6983 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6984 }
6985
6986 uint64_t NumResults = ResultLists[PHIs[0]].size();
6987
6988 // If the table has holes, we need a constant result for the default case
6989 // or a bitmask that fits in a register.
6990 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6991 bool HasDefaultResults =
6992 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6993 DefaultResultsList, DL, TTI);
6994
6995 for (const auto &I : DefaultResultsList) {
6996 PHINode *PHI = I.first;
6997 Constant *Result = I.second;
6998 DefaultResults[PHI] = Result;
6999 }
7000
7001 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7002 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7003 uint64_t TableSize;
7004 if (UseSwitchConditionAsTableIndex)
7005 TableSize = MaxCaseVal->getLimitedValue() + 1;
7006 else
7007 TableSize =
7008 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7009
7010 // If the default destination is unreachable, or if the lookup table covers
7011 // all values of the conditional variable, branch directly to the lookup table
7012 // BB. Otherwise, check that the condition is within the case range.
7013 bool DefaultIsReachable = !SI->defaultDestUndefined();
7014
7015 bool TableHasHoles = (NumResults < TableSize);
7016
7017 // If the table has holes but the default destination doesn't produce any
7018 // constant results, the lookup table entries corresponding to the holes will
7019 // contain poison.
7020 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7021
7022 // If the default destination doesn't produce a constant result but is still
7023 // reachable, and the lookup table has holes, we need to use a mask to
7024 // determine if the current index should load from the lookup table or jump
7025 // to the default case.
7026 // The mask is unnecessary if the table has holes but the default destination
7027 // is unreachable, as in that case the holes must also be unreachable.
7028 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7029 if (NeedMask) {
7030 // As an extra penalty for the validity test we require more cases.
7031 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7032 return false;
7033 if (!DL.fitsInLegalInteger(TableSize))
7034 return false;
7035 }
7036
7037 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7038 return false;
7039
7040 std::vector<DominatorTree::UpdateType> Updates;
7041
7042 // Compute the maximum table size representable by the integer type we are
7043 // switching upon.
7044 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7045 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7046 assert(MaxTableSize >= TableSize &&
7047 "It is impossible for a switch to have more entries than the max "
7048 "representable value of its input integer type's size.");
7049
7050 // Create the BB that does the lookups.
7051 Module &Mod = *CommonDest->getParent()->getParent();
7052 BasicBlock *LookupBB = BasicBlock::Create(
7053 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7054
7055 // Compute the table index value.
7056 Builder.SetInsertPoint(SI);
7057 Value *TableIndex;
7058 ConstantInt *TableIndexOffset;
7059 if (UseSwitchConditionAsTableIndex) {
7060 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7061 TableIndex = SI->getCondition();
7062 } else {
7063 TableIndexOffset = MinCaseVal;
7064 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7065 // we can try to attach nsw.
7066 bool MayWrap = true;
7067 if (!DefaultIsReachable) {
7068 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7069 (void)Res;
7070 }
7071
7072 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7073 "switch.tableidx", /*HasNUW =*/false,
7074 /*HasNSW =*/!MayWrap);
7075 }
7076
7077 BranchInst *RangeCheckBranch = nullptr;
7078
7079 // Grow the table to cover all possible index values to avoid the range check.
7080 // It will use the default result to fill in the table hole later, so make
7081 // sure it exist.
7082 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7083 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
7084 // Grow the table shouldn't have any size impact by checking
7085 // wouldFitInRegister.
7086 // TODO: Consider growing the table also when it doesn't fit in a register
7087 // if no optsize is specified.
7088 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7089 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
7090 return SwitchLookupTable::wouldFitInRegister(
7091 DL, UpperBound, KV.second /* ResultType */);
7092 })) {
7093 // There may be some case index larger than the UpperBound (unreachable
7094 // case), so make sure the table size does not get smaller.
7095 TableSize = std::max(UpperBound, TableSize);
7096 // The default branch is unreachable after we enlarge the lookup table.
7097 // Adjust DefaultIsReachable to reuse code path.
7098 DefaultIsReachable = false;
7099 }
7100 }
7101
7102 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7103 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7104 Builder.CreateBr(LookupBB);
7105 if (DTU)
7106 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7107 // Note: We call removeProdecessor later since we need to be able to get the
7108 // PHI value for the default case in case we're using a bit mask.
7109 } else {
7110 Value *Cmp = Builder.CreateICmpULT(
7111 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7112 RangeCheckBranch =
7113 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7114 if (DTU)
7115 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7116 }
7117
7118 // Populate the BB that does the lookups.
7119 Builder.SetInsertPoint(LookupBB);
7120
7121 if (NeedMask) {
7122 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7123 // re-purposed to do the hole check, and we create a new LookupBB.
7124 BasicBlock *MaskBB = LookupBB;
7125 MaskBB->setName("switch.hole_check");
7126 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7127 CommonDest->getParent(), CommonDest);
7128
7129 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7130 // unnecessary illegal types.
7131 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7132 APInt MaskInt(TableSizePowOf2, 0);
7133 APInt One(TableSizePowOf2, 1);
7134 // Build bitmask; fill in a 1 bit for every case.
7135 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7136 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
7137 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
7138 .getLimitedValue();
7139 MaskInt |= One << Idx;
7140 }
7141 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7142
7143 // Get the TableIndex'th bit of the bitmask.
7144 // If this bit is 0 (meaning hole) jump to the default destination,
7145 // else continue with table lookup.
7146 IntegerType *MapTy = TableMask->getIntegerType();
7147 Value *MaskIndex =
7148 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7149 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7150 Value *LoBit = Builder.CreateTrunc(
7151 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7152 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7153 if (DTU) {
7154 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7155 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7156 }
7157 Builder.SetInsertPoint(LookupBB);
7158 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7159 }
7160
7161 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7162 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7163 // do not delete PHINodes here.
7164 SI->getDefaultDest()->removePredecessor(BB,
7165 /*KeepOneInputPHIs=*/true);
7166 if (DTU)
7167 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7168 }
7169
7170 for (PHINode *PHI : PHIs) {
7171 const ResultListTy &ResultList = ResultLists[PHI];
7172
7173 Type *ResultType = ResultList.begin()->second->getType();
7174
7175 // Use any value to fill the lookup table holes.
7176 Constant *DV =
7177 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7178 StringRef FuncName = Fn->getName();
7179 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7180 DL, FuncName);
7181
7182 Value *Result = Table.buildLookup(TableIndex, Builder);
7183
7184 // Do a small peephole optimization: re-use the switch table compare if
7185 // possible.
7186 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7187 BasicBlock *PhiBlock = PHI->getParent();
7188 // Search for compare instructions which use the phi.
7189 for (auto *User : PHI->users()) {
7190 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
7191 }
7192 }
7193
7194 PHI->addIncoming(Result, LookupBB);
7195 }
7196
7197 Builder.CreateBr(CommonDest);
7198 if (DTU)
7199 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7200
7201 // Remove the switch.
7202 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7203 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7204 BasicBlock *Succ = SI->getSuccessor(i);
7205
7206 if (Succ == SI->getDefaultDest())
7207 continue;
7208 Succ->removePredecessor(BB);
7209 if (DTU && RemovedSuccessors.insert(Succ).second)
7210 Updates.push_back({DominatorTree::Delete, BB, Succ});
7211 }
7212 SI->eraseFromParent();
7213
7214 if (DTU)
7215 DTU->applyUpdates(Updates);
7216
7217 ++NumLookupTables;
7218 if (NeedMask)
7219 ++NumLookupTablesHoles;
7220 return true;
7221}
7222
7223/// Try to transform a switch that has "holes" in it to a contiguous sequence
7224/// of cases.
7225///
7226/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7227/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7228///
7229/// This converts a sparse switch into a dense switch which allows better
7230/// lowering and could also allow transforming into a lookup table.
7231static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7232 const DataLayout &DL,
7233 const TargetTransformInfo &TTI) {
7234 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7235 if (CondTy->getIntegerBitWidth() > 64 ||
7236 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7237 return false;
7238 // Only bother with this optimization if there are more than 3 switch cases;
7239 // SDAG will only bother creating jump tables for 4 or more cases.
7240 if (SI->getNumCases() < 4)
7241 return false;
7242
7243 // This transform is agnostic to the signedness of the input or case values. We
7244 // can treat the case values as signed or unsigned. We can optimize more common
7245 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7246 // as signed.
7248 for (const auto &C : SI->cases())
7249 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7250 llvm::sort(Values);
7251
7252 // If the switch is already dense, there's nothing useful to do here.
7253 if (isSwitchDense(Values))
7254 return false;
7255
7256 // First, transform the values such that they start at zero and ascend.
7257 int64_t Base = Values[0];
7258 for (auto &V : Values)
7259 V -= (uint64_t)(Base);
7260
7261 // Now we have signed numbers that have been shifted so that, given enough
7262 // precision, there are no negative values. Since the rest of the transform
7263 // is bitwise only, we switch now to an unsigned representation.
7264
7265 // This transform can be done speculatively because it is so cheap - it
7266 // results in a single rotate operation being inserted.
7267
7268 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7269 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7270 // less than 64.
7271 unsigned Shift = 64;
7272 for (auto &V : Values)
7273 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7274 assert(Shift < 64);
7275 if (Shift > 0)
7276 for (auto &V : Values)
7277 V = (int64_t)((uint64_t)V >> Shift);
7278
7279 if (!isSwitchDense(Values))
7280 // Transform didn't create a dense switch.
7281 return false;
7282
7283 // The obvious transform is to shift the switch condition right and emit a
7284 // check that the condition actually cleanly divided by GCD, i.e.
7285 // C & (1 << Shift - 1) == 0
7286 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7287 //
7288 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7289 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7290 // are nonzero then the switch condition will be very large and will hit the
7291 // default case.
7292
7293 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7294 Builder.SetInsertPoint(SI);
7295 Value *Sub =
7296 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7297 Value *Rot = Builder.CreateIntrinsic(
7298 Ty, Intrinsic::fshl,
7299 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7300 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7301
7302 for (auto Case : SI->cases()) {
7303 auto *Orig = Case.getCaseValue();
7304 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7305 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7306 }
7307 return true;
7308}
7309
7310/// Tries to transform switch of powers of two to reduce switch range.
7311/// For example, switch like:
7312/// switch (C) { case 1: case 2: case 64: case 128: }
7313/// will be transformed to:
7314/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7315///
7316/// This transformation allows better lowering and could allow transforming into
7317/// a lookup table.
7319 const DataLayout &DL,
7320 const TargetTransformInfo &TTI) {
7321 Value *Condition = SI->getCondition();
7322 LLVMContext &Context = SI->getContext();
7323 auto *CondTy = cast<IntegerType>(Condition->getType());
7324
7325 if (CondTy->getIntegerBitWidth() > 64 ||
7326 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7327 return false;
7328
7329 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7330 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7331 {Condition, ConstantInt::getTrue(Context)}),
7333
7334 if (CttzIntrinsicCost > TTI::TCC_Basic)
7335 // Inserting intrinsic is too expensive.
7336 return false;
7337
7338 // Only bother with this optimization if there are more than 3 switch cases.
7339 // SDAG will only bother creating jump tables for 4 or more cases.
7340 if (SI->getNumCases() < 4)
7341 return false;
7342
7343 // We perform this optimization only for switches with
7344 // unreachable default case.
7345 // This assumtion will save us from checking if `Condition` is a power of two.
7346 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7347 return false;
7348
7349 // Check that switch cases are powers of two.
7351 for (const auto &Case : SI->cases()) {
7352 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7353 if (llvm::has_single_bit(CaseValue))
7354 Values.push_back(CaseValue);
7355 else
7356 return false;
7357 }
7358
7359 // isSwichDense requires case values to be sorted.
7360 llvm::sort(Values);
7361 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7362 llvm::countr_zero(Values.front()) + 1))
7363 // Transform is unable to generate dense switch.
7364 return false;
7365
7366 Builder.SetInsertPoint(SI);
7367
7368 // Replace each case with its trailing zeros number.
7369 for (auto &Case : SI->cases()) {
7370 auto *OrigValue = Case.getCaseValue();
7371 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7372 OrigValue->getValue().countr_zero()));
7373 }
7374
7375 // Replace condition with its trailing zeros number.
7376 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7377 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7378
7379 SI->setCondition(ConditionTrailingZeros);
7380
7381 return true;
7382}
7383
7384/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7385/// the same destination.
7387 DomTreeUpdater *DTU) {
7388 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7389 if (!Cmp || !Cmp->hasOneUse())
7390 return false;
7391
7393 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7394 if (!HasWeights)
7395 Weights.resize(4); // Avoid checking HasWeights everywhere.
7396
7397 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7398 int64_t Res;
7399 BasicBlock *Succ, *OtherSucc;
7400 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7401 BasicBlock *Unreachable = nullptr;
7402
7403 if (SI->getNumCases() == 2) {
7404 // Find which of 1, 0 or -1 is missing (handled by default dest).
7405 SmallSet<int64_t, 3> Missing;
7406 Missing.insert(1);
7407 Missing.insert(0);
7408 Missing.insert(-1);
7409
7410 Succ = SI->getDefaultDest();
7411 SuccWeight = Weights[0];
7412 OtherSucc = nullptr;
7413 for (auto &Case : SI->cases()) {
7414 std::optional<int64_t> Val =
7415 Case.getCaseValue()->getValue().trySExtValue();
7416 if (!Val)
7417 return false;
7418 if (!Missing.erase(*Val))
7419 return false;
7420 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7421 return false;
7422 OtherSucc = Case.getCaseSuccessor();
7423 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7424 }
7425
7426 assert(Missing.size() == 1 && "Should have one case left");
7427 Res = *Missing.begin();
7428 } else if (SI->getNumCases() == 3 && SI->defaultDestUndefined()) {
7429 // Normalize so that Succ is taken once and OtherSucc twice.
7430 Unreachable = SI->getDefaultDest();
7431 Succ = OtherSucc = nullptr;
7432 for (auto &Case : SI->cases()) {
7433 BasicBlock *NewSucc = Case.getCaseSuccessor();
7434 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7435 if (!OtherSucc || OtherSucc == NewSucc) {
7436 OtherSucc = NewSucc;
7437 OtherSuccWeight += Weight;
7438 } else if (!Succ) {
7439 Succ = NewSucc;
7440 SuccWeight = Weight;
7441 } else if (Succ == NewSucc) {
7442 std::swap(Succ, OtherSucc);
7443 std::swap(SuccWeight, OtherSuccWeight);
7444 } else
7445 return false;
7446 }
7447 for (auto &Case : SI->cases()) {
7448 std::optional<int64_t> Val =
7449 Case.getCaseValue()->getValue().trySExtValue();
7450 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7451 return false;
7452 if (Case.getCaseSuccessor() == Succ) {
7453 Res = *Val;
7454 break;
7455 }
7456 }
7457 } else {
7458 return false;
7459 }
7460
7461 // Determine predicate for the missing case.
7463 switch (Res) {
7464 case 1:
7465 Pred = ICmpInst::ICMP_UGT;
7466 break;
7467 case 0:
7468 Pred = ICmpInst::ICMP_EQ;
7469 break;
7470 case -1:
7471 Pred = ICmpInst::ICMP_ULT;
7472 break;
7473 }
7474 if (Cmp->isSigned())
7475 Pred = ICmpInst::getSignedPredicate(Pred);
7476
7477 MDNode *NewWeights = nullptr;
7478 if (HasWeights)
7479 NewWeights = MDBuilder(SI->getContext())
7480 .createBranchWeights(SuccWeight, OtherSuccWeight);
7481
7482 BasicBlock *BB = SI->getParent();
7483 Builder.SetInsertPoint(SI->getIterator());
7484 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7485 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7486 SI->getMetadata(LLVMContext::MD_unpredictable));
7487 OtherSucc->removePredecessor(BB);
7488 if (Unreachable)
7489 Unreachable->removePredecessor(BB);
7490 SI->eraseFromParent();
7491 Cmp->eraseFromParent();
7492 if (DTU && Unreachable)
7493 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7494 return true;
7495}
7496
7497/// Checking whether two cases of SI are equal depends on the contents of the
7498/// BasicBlock and the incoming values of their successor PHINodes.
7499/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7500/// calling this function on each BasicBlock every time isEqual is called,
7501/// especially since the same BasicBlock may be passed as an argument multiple
7502/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7503/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7504/// of the incoming values.
7508};
7509
7510namespace llvm {
7511template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7513 return static_cast<SwitchSuccWrapper *>(
7515 }
7517 return static_cast<SwitchSuccWrapper *>(
7519 }
7520 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7521 BasicBlock *Succ = SSW->Dest;
7522 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7523 assert(BI->isUnconditional() &&
7524 "Only supporting unconditional branches for now");
7525 assert(BI->getNumSuccessors() == 1 &&
7526 "Expected unconditional branches to have one successor");
7527 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7528
7529 // Since we assume the BB is just a single BranchInst with a single
7530 // successor, we hash as the BB and the incoming Values of its successor
7531 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7532 // including the incoming PHI values leads to better performance.
7533 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7534 // time and passing it in SwitchSuccWrapper, but this slowed down the
7535 // average compile time without having any impact on the worst case compile
7536 // time.
7537 BasicBlock *BB = BI->getSuccessor(0);
7538 SmallVector<Value *> PhiValsForBB;
7539 for (PHINode &Phi : BB->phis())
7540 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7541
7542 return hash_combine(
7543 BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
7544 }
7545 static bool isEqual(const SwitchSuccWrapper *LHS,
7546 const SwitchSuccWrapper *RHS) {
7549 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7550 return LHS == RHS;
7551
7552 BasicBlock *A = LHS->Dest;
7553 BasicBlock *B = RHS->Dest;
7554
7555 // FIXME: we checked that the size of A and B are both 1 in
7556 // simplifyDuplicateSwitchArms to make the Case list smaller to
7557 // improve performance. If we decide to support BasicBlocks with more
7558 // than just a single instruction, we need to check that A.size() ==
7559 // B.size() here, and we need to check more than just the BranchInsts
7560 // for equality.
7561
7562 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7563 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7564 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7565 "Only supporting unconditional branches for now");
7566 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7567 return false;
7568
7569 // Need to check that PHIs in successor have matching values
7570 BasicBlock *Succ = ABI->getSuccessor(0);
7571 for (PHINode &Phi : Succ->phis()) {
7572 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7573 if (PredIVs[A] != PredIVs[B])
7574 return false;
7575 }
7576
7577 return true;
7578 }
7579};
7580} // namespace llvm
7581
7582bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7583 DomTreeUpdater *DTU) {
7584 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7585 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7586 // an entire PHI at once after the loop, opposed to calling
7587 // getIncomingValueForBlock inside this loop, since each call to
7588 // getIncomingValueForBlock is O(|Preds|).
7594 Cases.reserve(SI->getNumSuccessors());
7595
7596 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7597 BasicBlock *BB = SI->getSuccessor(I);
7598
7599 // FIXME: Support more than just a single BranchInst. One way we could do
7600 // this is by taking a hashing approach of all insts in BB.
7601 if (BB->size() != 1)
7602 continue;
7603
7604 // FIXME: This case needs some extra care because the terminators other than
7605 // SI need to be updated. For now, consider only backedges to the SI.
7606 if (BB->hasNPredecessorsOrMore(4) ||
7607 BB->getUniquePredecessor() != SI->getParent())
7608 continue;
7609
7610 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7611 // on other kinds of terminators. We decide to only support unconditional
7612 // branches for now for compile time reasons.
7613 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7614 if (!BI || BI->isConditional())
7615 continue;
7616
7617 if (Seen.insert(BB).second) {
7618 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7619 for (BasicBlock *Succ : BI->successors())
7620 for (PHINode &Phi : Succ->phis())
7621 Phis.insert(&Phi);
7622 // Add the successor only if not previously visited.
7623 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7624 }
7625
7626 BBToSuccessorIndexes[BB].emplace_back(I);
7627 }
7628
7629 // Precompute a data structure to improve performance of isEqual for
7630 // SwitchSuccWrapper.
7631 PhiPredIVs.reserve(Phis.size());
7632 for (PHINode *Phi : Phis) {
7633 PhiPredIVs[Phi] =
7634 SmallDenseMap<BasicBlock *, Value *, 8>(Phi->getNumIncomingValues());
7635 for (auto &IV : Phi->incoming_values())
7636 PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
7637 }
7638
7639 // Build a set such that if the SwitchSuccWrapper exists in the set and
7640 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7641 // which is not in the set should be replaced with the one in the set. If the
7642 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7643 // other SwitchSuccWrappers can check against it in the same manner. We use
7644 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7645 // around information to isEquality, getHashValue, and when doing the
7646 // replacement with better performance.
7648 ReplaceWith.reserve(Cases.size());
7649
7651 Updates.reserve(ReplaceWith.size());
7652 bool MadeChange = false;
7653 for (auto &SSW : Cases) {
7654 // SSW is a candidate for simplification. If we find a duplicate BB,
7655 // replace it.
7656 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7657 if (!Inserted) {
7658 // We know that SI's parent BB no longer dominates the old case successor
7659 // since we are making it dead.
7660 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7661 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7662 for (unsigned Idx : Successors)
7663 SI->setSuccessor(Idx, (*It)->Dest);
7664 MadeChange = true;
7665 }
7666 }
7667
7668 if (DTU)
7669 DTU->applyUpdates(Updates);
7670
7671 return MadeChange;
7672}
7673
7674bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7675 BasicBlock *BB = SI->getParent();
7676
7677 if (isValueEqualityComparison(SI)) {
7678 // If we only have one predecessor, and if it is a branch on this value,
7679 // see if that predecessor totally determines the outcome of this switch.
7680 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7681 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7682 return requestResimplify();
7683
7684 Value *Cond = SI->getCondition();
7685 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7686 if (simplifySwitchOnSelect(SI, Select))
7687 return requestResimplify();
7688
7689 // If the block only contains the switch, see if we can fold the block
7690 // away into any preds.
7691 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7692 if (foldValueComparisonIntoPredecessors(SI, Builder))
7693 return requestResimplify();
7694 }
7695
7696 // Try to transform the switch into an icmp and a branch.
7697 // The conversion from switch to comparison may lose information on
7698 // impossible switch values, so disable it early in the pipeline.
7699 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7700 return requestResimplify();
7701
7702 // Remove unreachable cases.
7703 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7704 return requestResimplify();
7705
7706 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7707 return requestResimplify();
7708
7709 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7710 return requestResimplify();
7711
7712 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7713 return requestResimplify();
7714
7715 // The conversion from switch to lookup tables results in difficult-to-analyze
7716 // code and makes pruning branches much harder. This is a problem if the
7717 // switch expression itself can still be restricted as a result of inlining or
7718 // CVP. Therefore, only apply this transformation during late stages of the
7719 // optimisation pipeline.
7720 if (Options.ConvertSwitchToLookupTable &&
7721 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7722 return requestResimplify();
7723
7724 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7725 return requestResimplify();
7726
7727 if (reduceSwitchRange(SI, Builder, DL, TTI))
7728 return requestResimplify();
7729
7730 if (HoistCommon &&
7731 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7732 return requestResimplify();
7733
7734 if (simplifyDuplicateSwitchArms(SI, DTU))
7735 return requestResimplify();
7736
7737 return false;
7738}
7739
7740bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7741 BasicBlock *BB = IBI->getParent();
7742 bool Changed = false;
7743
7744 // Eliminate redundant destinations.
7747 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7748 BasicBlock *Dest = IBI->getDestination(i);
7749 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7750 if (!Dest->hasAddressTaken())
7751 RemovedSuccs.insert(Dest);
7752 Dest->removePredecessor(BB);
7753 IBI->removeDestination(i);
7754 --i;
7755 --e;
7756 Changed = true;
7757 }
7758 }
7759
7760 if (DTU) {
7761 std::vector<DominatorTree::UpdateType> Updates;
7762 Updates.reserve(RemovedSuccs.size());
7763 for (auto *RemovedSucc : RemovedSuccs)
7764 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7765 DTU->applyUpdates(Updates);
7766 }
7767
7768 if (IBI->getNumDestinations() == 0) {
7769 // If the indirectbr has no successors, change it to unreachable.
7770 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7772 return true;
7773 }
7774
7775 if (IBI->getNumDestinations() == 1) {
7776 // If the indirectbr has one successor, change it to a direct branch.
7779 return true;
7780 }
7781
7782 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7783 if (simplifyIndirectBrOnSelect(IBI, SI))
7784 return requestResimplify();
7785 }
7786 return Changed;
7787}
7788
7789/// Given an block with only a single landing pad and a unconditional branch
7790/// try to find another basic block which this one can be merged with. This
7791/// handles cases where we have multiple invokes with unique landing pads, but
7792/// a shared handler.
7793///
7794/// We specifically choose to not worry about merging non-empty blocks
7795/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7796/// practice, the optimizer produces empty landing pad blocks quite frequently
7797/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7798/// sinking in this file)
7799///
7800/// This is primarily a code size optimization. We need to avoid performing
7801/// any transform which might inhibit optimization (such as our ability to
7802/// specialize a particular handler via tail commoning). We do this by not
7803/// merging any blocks which require us to introduce a phi. Since the same
7804/// values are flowing through both blocks, we don't lose any ability to
7805/// specialize. If anything, we make such specialization more likely.
7806///
7807/// TODO - This transformation could remove entries from a phi in the target
7808/// block when the inputs in the phi are the same for the two blocks being
7809/// merged. In some cases, this could result in removal of the PHI entirely.
7811 BasicBlock *BB, DomTreeUpdater *DTU) {
7812 auto Succ = BB->getUniqueSuccessor();
7813 assert(Succ);
7814 // If there's a phi in the successor block, we'd likely have to introduce
7815 // a phi into the merged landing pad block.
7816 if (isa<PHINode>(*Succ->begin()))
7817 return false;
7818
7819 for (BasicBlock *OtherPred : predecessors(Succ)) {
7820 if (BB == OtherPred)
7821 continue;
7822 BasicBlock::iterator I = OtherPred->begin();
7823 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7824 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7825 continue;
7826 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7827 ;
7828 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7829 if (!BI2 || !BI2->isIdenticalTo(BI))
7830 continue;
7831
7832 std::vector<DominatorTree::UpdateType> Updates;
7833
7834 // We've found an identical block. Update our predecessors to take that
7835 // path instead and make ourselves dead.
7837 for (BasicBlock *Pred : UniquePreds) {
7838 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7839 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7840 "unexpected successor");
7841 II->setUnwindDest(OtherPred);
7842 if (DTU) {
7843 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7844 Updates.push_back({DominatorTree::Delete, Pred, BB});
7845 }
7846 }
7847
7848 // The debug info in OtherPred doesn't cover the merged control flow that
7849 // used to go through BB. We need to delete it or update it.
7850 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7851 if (isa<DbgInfoIntrinsic>(Inst))
7852 Inst.eraseFromParent();
7853
7855 for (BasicBlock *Succ : UniqueSuccs) {
7856 Succ->removePredecessor(BB);
7857 if (DTU)
7858 Updates.push_back({DominatorTree::Delete, BB, Succ});
7859 }
7860
7861 IRBuilder<> Builder(BI);
7862 Builder.CreateUnreachable();
7863 BI->eraseFromParent();
7864 if (DTU)
7865 DTU->applyUpdates(Updates);
7866 return true;
7867 }
7868 return false;
7869}
7870
7871bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7872 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7873 : simplifyCondBranch(Branch, Builder);
7874}
7875
7876bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7877 IRBuilder<> &Builder) {
7878 BasicBlock *BB = BI->getParent();
7879 BasicBlock *Succ = BI->getSuccessor(0);
7880
7881 // If the Terminator is the only non-phi instruction, simplify the block.
7882 // If LoopHeader is provided, check if the block or its successor is a loop
7883 // header. (This is for early invocations before loop simplify and
7884 // vectorization to keep canonical loop forms for nested loops. These blocks
7885 // can be eliminated when the pass is invoked later in the back-end.)
7886 // Note that if BB has only one predecessor then we do not introduce new
7887 // backedge, so we can eliminate BB.
7888 bool NeedCanonicalLoop =
7889 Options.NeedCanonicalLoop &&
7890 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7891 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7893 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7894 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7895 return true;
7896
7897 // If the only instruction in the block is a seteq/setne comparison against a
7898 // constant, try to simplify the block.
7899 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7900 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7901 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7902 ;
7903 if (I->isTerminator() &&
7904 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7905 return true;
7906 }
7907
7908 // See if we can merge an empty landing pad block with another which is
7909 // equivalent.
7910 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7911 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7912 ;
7913 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7914 return true;
7915 }
7916
7917 // If this basic block is ONLY a compare and a branch, and if a predecessor
7918 // branches to us and our successor, fold the comparison into the
7919 // predecessor and use logical operations to update the incoming value
7920 // for PHI nodes in common successor.
7921 if (Options.SpeculateBlocks &&
7922 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7923 Options.BonusInstThreshold))
7924 return requestResimplify();
7925 return false;
7926}
7927
7929 BasicBlock *PredPred = nullptr;
7930 for (auto *P : predecessors(BB)) {
7931 BasicBlock *PPred = P->getSinglePredecessor();
7932 if (!PPred || (PredPred && PredPred != PPred))
7933 return nullptr;
7934 PredPred = PPred;
7935 }
7936 return PredPred;
7937}
7938
7939/// Fold the following pattern:
7940/// bb0:
7941/// br i1 %cond1, label %bb1, label %bb2
7942/// bb1:
7943/// br i1 %cond2, label %bb3, label %bb4
7944/// bb2:
7945/// br i1 %cond2, label %bb4, label %bb3
7946/// bb3:
7947/// ...
7948/// bb4:
7949/// ...
7950/// into
7951/// bb0:
7952/// %cond = xor i1 %cond1, %cond2
7953/// br i1 %cond, label %bb4, label %bb3
7954/// bb3:
7955/// ...
7956/// bb4:
7957/// ...
7958/// NOTE: %cond2 always dominates the terminator of bb0.
7960 BasicBlock *BB = BI->getParent();
7961 BasicBlock *BB1 = BI->getSuccessor(0);
7962 BasicBlock *BB2 = BI->getSuccessor(1);
7963 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7964 if (Succ == BB)
7965 return false;
7966 if (&Succ->front() != Succ->getTerminator())
7967 return false;
7968 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7969 if (!SuccBI || !SuccBI->isConditional())
7970 return false;
7971 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7972 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7973 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7974 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7975 };
7976 BranchInst *BB1BI, *BB2BI;
7977 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7978 return false;
7979
7980 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7981 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7982 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7983 return false;
7984
7985 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7986 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7987 IRBuilder<> Builder(BI);
7988 BI->setCondition(
7989 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7990 BB1->removePredecessor(BB);
7991 BI->setSuccessor(0, BB4);
7992 BB2->removePredecessor(BB);
7993 BI->setSuccessor(1, BB3);
7994 if (DTU) {
7996 Updates.push_back({DominatorTree::Delete, BB, BB1});
7997 Updates.push_back({DominatorTree::Insert, BB, BB4});
7998 Updates.push_back({DominatorTree::Delete, BB, BB2});
7999 Updates.push_back({DominatorTree::Insert, BB, BB3});
8000
8001 DTU->applyUpdates(Updates);
8002 }
8003 bool HasWeight = false;
8004 uint64_t BBTWeight, BBFWeight;
8005 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8006 HasWeight = true;
8007 else
8008 BBTWeight = BBFWeight = 1;
8009 uint64_t BB1TWeight, BB1FWeight;
8010 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8011 HasWeight = true;
8012 else
8013 BB1TWeight = BB1FWeight = 1;
8014 uint64_t BB2TWeight, BB2FWeight;
8015 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8016 HasWeight = true;
8017 else
8018 BB2TWeight = BB2FWeight = 1;
8019 if (HasWeight) {
8020 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8021 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8022 fitWeights(Weights);
8023 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8024 }
8025 return true;
8026}
8027
8028bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8029 assert(
8030 !isa<ConstantInt>(BI->getCondition()) &&
8031 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8032 "Tautological conditional branch should have been eliminated already.");
8033
8034 BasicBlock *BB = BI->getParent();
8035 if (!Options.SimplifyCondBranch ||
8036 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8037 return false;
8038
8039 // Conditional branch
8040 if (isValueEqualityComparison(BI)) {
8041 // If we only have one predecessor, and if it is a branch on this value,
8042 // see if that predecessor totally determines the outcome of this
8043 // switch.
8044 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8045 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8046 return requestResimplify();
8047
8048 // This block must be empty, except for the setcond inst, if it exists.
8049 // Ignore dbg and pseudo intrinsics.
8050 auto I = BB->instructionsWithoutDebug(true).begin();
8051 if (&*I == BI) {
8052 if (foldValueComparisonIntoPredecessors(BI, Builder))
8053 return requestResimplify();
8054 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8055 ++I;
8056 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8057 return requestResimplify();
8058 }
8059 }
8060
8061 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8062 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8063 return true;
8064
8065 // If this basic block has dominating predecessor blocks and the dominating
8066 // blocks' conditions imply BI's condition, we know the direction of BI.
8067 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8068 if (Imp) {
8069 // Turn this into a branch on constant.
8070 auto *OldCond = BI->getCondition();
8071 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8072 : ConstantInt::getFalse(BB->getContext());
8073 BI->setCondition(TorF);
8075 return requestResimplify();
8076 }
8077
8078 // If this basic block is ONLY a compare and a branch, and if a predecessor
8079 // branches to us and one of our successors, fold the comparison into the
8080 // predecessor and use logical operations to pick the right destination.
8081 if (Options.SpeculateBlocks &&
8082 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8083 Options.BonusInstThreshold))
8084 return requestResimplify();
8085
8086 // We have a conditional branch to two blocks that are only reachable
8087 // from BI. We know that the condbr dominates the two blocks, so see if
8088 // there is any identical code in the "then" and "else" blocks. If so, we
8089 // can hoist it up to the branching block.
8090 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8091 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8092 if (HoistCommon &&
8093 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8094 return requestResimplify();
8095
8097 Options.HoistLoadsStoresWithCondFaulting &&
8098 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8099 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8100 auto CanSpeculateConditionalLoadsStores = [&]() {
8101 for (auto *Succ : successors(BB)) {
8102 for (Instruction &I : *Succ) {
8103 if (I.isTerminator()) {
8104 if (I.getNumSuccessors() > 1)
8105 return false;
8106 continue;
8107 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8108 SpeculatedConditionalLoadsStores.size() ==
8110 return false;
8111 }
8112 SpeculatedConditionalLoadsStores.push_back(&I);
8113 }
8114 }
8115 return !SpeculatedConditionalLoadsStores.empty();
8116 };
8117
8118 if (CanSpeculateConditionalLoadsStores()) {
8119 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8120 std::nullopt);
8121 return requestResimplify();
8122 }
8123 }
8124 } else {
8125 // If Successor #1 has multiple preds, we may be able to conditionally
8126 // execute Successor #0 if it branches to Successor #1.
8127 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8128 if (Succ0TI->getNumSuccessors() == 1 &&
8129 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8130 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8131 return requestResimplify();
8132 }
8133 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8134 // If Successor #0 has multiple preds, we may be able to conditionally
8135 // execute Successor #1 if it branches to Successor #0.
8136 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8137 if (Succ1TI->getNumSuccessors() == 1 &&
8138 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8139 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8140 return requestResimplify();
8141 }
8142
8143 // If this is a branch on something for which we know the constant value in
8144 // predecessors (e.g. a phi node in the current block), thread control
8145 // through this block.
8147 return requestResimplify();
8148
8149 // Scan predecessor blocks for conditional branches.
8150 for (BasicBlock *Pred : predecessors(BB))
8151 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8152 if (PBI != BI && PBI->isConditional())
8153 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8154 return requestResimplify();
8155
8156 // Look for diamond patterns.
8157 if (MergeCondStores)
8159 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8160 if (PBI != BI && PBI->isConditional())
8161 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8162 return requestResimplify();
8163
8164 // Look for nested conditional branches.
8165 if (mergeNestedCondBranch(BI, DTU))
8166 return requestResimplify();
8167
8168 return false;
8169}
8170
8171/// Check if passing a value to an instruction will cause undefined behavior.
8172static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8173 Constant *C = dyn_cast<Constant>(V);
8174 if (!C)
8175 return false;
8176
8177 if (I->use_empty())
8178 return false;
8179
8180 if (C->isNullValue() || isa<UndefValue>(C)) {
8181 // Only look at the first use we can handle, avoid hurting compile time with
8182 // long uselists
8183 auto FindUse = llvm::find_if(I->users(), [](auto *U) {
8184 auto *Use = cast<Instruction>(U);
8185 // Change this list when we want to add new instructions.
8186 switch (Use->getOpcode()) {
8187 default:
8188 return false;
8189 case Instruction::GetElementPtr:
8190 case Instruction::Ret:
8191 case Instruction::BitCast:
8192 case Instruction::Load:
8193 case Instruction::Store:
8194 case Instruction::Call:
8195 case Instruction::CallBr:
8196 case Instruction::Invoke:
8197 case Instruction::UDiv:
8198 case Instruction::URem:
8199 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8200 // implemented to avoid code complexity as it is unclear how useful such
8201 // logic is.
8202 case Instruction::SDiv:
8203 case Instruction::SRem:
8204 return true;
8205 }
8206 });
8207 if (FindUse == I->user_end())
8208 return false;
8209 auto *Use = cast<Instruction>(*FindUse);
8210 // Bail out if Use is not in the same BB as I or Use == I or Use comes
8211 // before I in the block. The latter two can be the case if Use is a
8212 // PHI node.
8213 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8214 return false;
8215
8216 // Now make sure that there are no instructions in between that can alter
8217 // control flow (eg. calls)
8218 auto InstrRange =
8219 make_range(std::next(I->getIterator()), Use->getIterator());
8220 if (any_of(InstrRange, [](Instruction &I) {
8222 }))
8223 return false;
8224
8225 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8226 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
8227 if (GEP->getPointerOperand() == I) {
8228 // The current base address is null, there are four cases to consider:
8229 // getelementptr (TY, null, 0) -> null
8230 // getelementptr (TY, null, not zero) -> may be modified
8231 // getelementptr inbounds (TY, null, 0) -> null
8232 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8233 // undefined?
8234 if (!GEP->hasAllZeroIndices() &&
8235 (!GEP->isInBounds() ||
8236 NullPointerIsDefined(GEP->getFunction(),
8237 GEP->getPointerAddressSpace())))
8238 PtrValueMayBeModified = true;
8239 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8240 }
8241
8242 // Look through return.
8243 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
8244 bool HasNoUndefAttr =
8245 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8246 // Return undefined to a noundef return value is undefined.
8247 if (isa<UndefValue>(C) && HasNoUndefAttr)
8248 return true;
8249 // Return null to a nonnull+noundef return value is undefined.
8250 if (C->isNullValue() && HasNoUndefAttr &&
8251 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8252 return !PtrValueMayBeModified;
8253 }
8254 }
8255
8256 // Load from null is undefined.
8257 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
8258 if (!LI->isVolatile())
8259 return !NullPointerIsDefined(LI->getFunction(),
8260 LI->getPointerAddressSpace());
8261
8262 // Store to null is undefined.
8263 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
8264 if (!SI->isVolatile())
8265 return (!NullPointerIsDefined(SI->getFunction(),
8266 SI->getPointerAddressSpace())) &&
8267 SI->getPointerOperand() == I;
8268
8269 // llvm.assume(false/undef) always triggers immediate UB.
8270 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
8271 // Ignore assume operand bundles.
8272 if (I == Assume->getArgOperand(0))
8273 return true;
8274 }
8275
8276 if (auto *CB = dyn_cast<CallBase>(Use)) {
8277 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8278 return false;
8279 // A call to null is undefined.
8280 if (CB->getCalledOperand() == I)
8281 return true;
8282
8283 if (C->isNullValue()) {
8284 for (const llvm::Use &Arg : CB->args())
8285 if (Arg == I) {
8286 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8287 if (CB->isPassingUndefUB(ArgIdx) &&
8288 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
8289 // Passing null to a nonnnull+noundef argument is undefined.
8290 return !PtrValueMayBeModified;
8291 }
8292 }
8293 } else if (isa<UndefValue>(C)) {
8294 // Passing undef to a noundef argument is undefined.
8295 for (const llvm::Use &Arg : CB->args())
8296 if (Arg == I) {
8297 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8298 if (CB->isPassingUndefUB(ArgIdx)) {
8299 // Passing undef to a noundef argument is undefined.
8300 return true;
8301 }
8302 }
8303 }
8304 }
8305 // Div/Rem by zero is immediate UB
8306 if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem())
8307 return true;
8308 }
8309 return false;
8310}
8311
8312/// If BB has an incoming value that will always trigger undefined behavior
8313/// (eg. null pointer dereference), remove the branch leading here.
8315 DomTreeUpdater *DTU,
8316 AssumptionCache *AC) {
8317 for (PHINode &PHI : BB->phis())
8318 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8319 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8320 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8321 Instruction *T = Predecessor->getTerminator();
8322 IRBuilder<> Builder(T);
8323 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8324 BB->removePredecessor(Predecessor);
8325 // Turn unconditional branches into unreachables and remove the dead
8326 // destination from conditional branches.
8327 if (BI->isUnconditional())
8328 Builder.CreateUnreachable();
8329 else {
8330 // Preserve guarding condition in assume, because it might not be
8331 // inferrable from any dominating condition.
8332 Value *Cond = BI->getCondition();
8333 CallInst *Assumption;
8334 if (BI->getSuccessor(0) == BB)
8335 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8336 else
8337 Assumption = Builder.CreateAssumption(Cond);
8338 if (AC)
8339 AC->registerAssumption(cast<AssumeInst>(Assumption));
8340 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8341 : BI->getSuccessor(0));
8342 }
8343 BI->eraseFromParent();
8344 if (DTU)
8345 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8346 return true;
8347 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8348 // Redirect all branches leading to UB into
8349 // a newly created unreachable block.
8350 BasicBlock *Unreachable = BasicBlock::Create(
8351 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8352 Builder.SetInsertPoint(Unreachable);
8353 // The new block contains only one instruction: Unreachable
8354 Builder.CreateUnreachable();
8355 for (const auto &Case : SI->cases())
8356 if (Case.getCaseSuccessor() == BB) {
8357 BB->removePredecessor(Predecessor);
8358 Case.setSuccessor(Unreachable);
8359 }
8360 if (SI->getDefaultDest() == BB) {
8361 BB->removePredecessor(Predecessor);
8362 SI->setDefaultDest(Unreachable);
8363 }
8364
8365 if (DTU)
8366 DTU->applyUpdates(
8367 { { DominatorTree::Insert, Predecessor, Unreachable },
8368 { DominatorTree::Delete, Predecessor, BB } });
8369 return true;
8370 }
8371 }
8372
8373 return false;
8374}
8375
8376bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8377 bool Changed = false;
8378
8379 assert(BB && BB->getParent() && "Block not embedded in function!");
8380 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8381
8382 // Remove basic blocks that have no predecessors (except the entry block)...
8383 // or that just have themself as a predecessor. These are unreachable.
8384 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8385 BB->getSinglePredecessor() == BB) {
8386 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8387 DeleteDeadBlock(BB, DTU);
8388 return true;
8389 }
8390
8391 // Check to see if we can constant propagate this terminator instruction
8392 // away...
8393 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8394 /*TLI=*/nullptr, DTU);
8395
8396 // Check for and eliminate duplicate PHI nodes in this block.
8397 Changed |= EliminateDuplicatePHINodes(BB);
8398
8399 // Check for and remove branches that will always cause undefined behavior.
8401 return requestResimplify();
8402
8403 // Merge basic blocks into their predecessor if there is only one distinct
8404 // pred, and if there is only one distinct successor of the predecessor, and
8405 // if there are no PHI nodes.
8406 if (MergeBlockIntoPredecessor(BB, DTU))
8407 return true;
8408
8409 if (SinkCommon && Options.SinkCommonInsts)
8410 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8411 mergeCompatibleInvokes(BB, DTU)) {
8412 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8413 // so we may now how duplicate PHI's.
8414 // Let's rerun EliminateDuplicatePHINodes() first,
8415 // before foldTwoEntryPHINode() potentially converts them into select's,
8416 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8417 return true;
8418 }
8419
8420 IRBuilder<> Builder(BB);
8421
8422 if (Options.SpeculateBlocks &&
8423 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8424 // If there is a trivial two-entry PHI node in this basic block, and we can
8425 // eliminate it, do so now.
8426 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8427 if (PN->getNumIncomingValues() == 2)
8428 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8429 Options.SpeculateUnpredictables))
8430 return true;
8431 }
8432
8434 Builder.SetInsertPoint(Terminator);
8435 switch (Terminator->getOpcode()) {
8436 case Instruction::Br:
8437 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8438 break;
8439 case Instruction::Resume:
8440 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8441 break;
8442 case Instruction::CleanupRet:
8443 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8444 break;
8445 case Instruction::Switch:
8446 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8447 break;
8448 case Instruction::Unreachable:
8449 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8450 break;
8451 case Instruction::IndirectBr:
8452 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8453 break;
8454 }
8455
8456 return Changed;
8457}
8458
8459bool SimplifyCFGOpt::run(BasicBlock *BB) {
8460 bool Changed = false;
8461
8462 // Repeated simplify BB as long as resimplification is requested.
8463 do {
8464 Resimplify = false;
8465
8466 // Perform one round of simplifcation. Resimplify flag will be set if
8467 // another iteration is requested.
8468 Changed |= simplifyOnce(BB);
8469 } while (Resimplify);
8470
8471 return Changed;
8472}
8473
8476 ArrayRef<WeakVH> LoopHeaders) {
8477 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8478 Options)
8479 .run(BB);
8480}
#define Fail
#define Success
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< bool > HoistLoadsStoresWithCondFaulting("simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting"))
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert)
If the target supports conditional faulting, we look for the following pattern:
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditonal load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1554
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1915
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:177
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:378
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:717
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
size_t size() const
Definition: BasicBlock.h:469
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:516
The address of a basic block.
Definition: Constants.h:893
BasicBlock * getBasicBlock() const
Definition: Constants.h:924
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1576
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2625
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isNegative() const
Definition: Constants.h:203
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:187
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
iterator begin()
Definition: Function.h:853
size_t size() const
Definition: Function.h:858
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2066
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1286
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:521
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1897
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1167
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1138
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2227
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1705
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:390
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:906
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:949
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:176
static unsigned getPointerOperandIndex()
Definition: Instructions.h:257
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Align getAlign() const
Definition: Instructions.h:333
bool isSimple() const
Definition: Instructions.h:370
Value * getValueOperand()
Definition: Instructions.h:378
bool isUnordered() const
Definition: Instructions.h:372
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:234
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:886
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
static constexpr uint64_t MaximumAlignment
Definition: Value.h:811
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:90
size_type size() const
Definition: DenseSet.h:81
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1866
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1880
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1156
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1439
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3236
auto succ_size(const MachineBasicBlock *BB)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3426
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3707
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4210
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:235
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1624
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2067
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1524
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254